From ad05a5b7c67d792f74d7af739a851e69a54f12eb Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 13 Dec 2007 05:32:03 +0000 Subject: Adjustments to match unicode policy as discussed on list and other minor cleanup in preparation for release 1.7. --- markdown.py | 62 ++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'markdown.py') diff --git a/markdown.py b/markdown.py index cc657c8..b463ef4 100644 --- a/markdown.py +++ b/markdown.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -version = "1.6b" -version_info = (1,6,2,"rc-2") +version = "1.7" +version_info = (1,7,0,"rc-1") __revision__ = "$Rev$" """ @@ -20,9 +20,11 @@ script. (You might want to read that before you try modifying this file.) Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and -maintained by [Yuri Takhteyev](http://www.freewisdom.org). +maintained by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan +Limberg](http://achinghead.com/). Contact: yuri [at] freewisdom.org + waylan [at] gmail.com License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD @@ -1111,19 +1113,21 @@ class Markdown: Markdown text """ - def __init__(self, source=None, # deprecated + def __init__(self, source=None, # depreciated extensions=[], extension_configs=None, - encoding="utf-8", safe_mode = False): """Creates a new Markdown instance. - @param source: The text in Markdown format. - @param encoding: The character encoding of . """ + @param source: The text in Markdown format. Depreciated! + @param extensions: A list if extensions. + @param extension-configs: Configuration setting for extensions. + @param safe_mode: Disallow raw html. """ - self.safeMode = safe_mode - self.encoding = encoding self.source = source + if source is not None: + message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.") + self.safeMode = safe_mode self.blockGuru = BlockGuru() self.registeredExtensions = [] self.stripTopLevelTags = 1 @@ -1155,8 +1159,8 @@ class Markdown: LINK_ANGLED_PATTERN, LINK_PATTERN, IMAGE_LINK_PATTERN, - IMAGE_REFERENCE_PATTERN, - AUTOLINK_PATTERN, + IMAGE_REFERENCE_PATTERN, + AUTOLINK_PATTERN, AUTOMAIL_PATTERN, LINE_BREAK_PATTERN_2, LINE_BREAK_PATTERN, @@ -1684,15 +1688,18 @@ class Markdown: """Return the document in XHTML format. @returns: A serialized XHTML body.""" - #try : if source is not None: #Allow blank string self.source = source if not self.source: - return "" + return u"" - self.source = removeBOM(self.source, self.encoding) + try: + self.source = unicode(self.source) + except UnicodeDecodeError: + message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.') + return u"" for pp in self.textPreprocessors: self.source = pp.run(self.source) @@ -1700,8 +1707,6 @@ class Markdown: doc = self._transform() xml = doc.toxml() - #finally: - # doc.unlink() # Return everything but the top level tag @@ -1714,19 +1719,17 @@ class Markdown: return (self.docType + xml).strip() - __str__ = convert # deprecated - will be changed in 1.7 to report - # information about the MD instance - - toString = __str__ # toString() method is deprecated - - - def __unicode__(self): - """Return the document in XHTML format as a Unicode object. - """ - return str(self)#.decode(self.encoding) + def __str__(self): + ''' Report info about instance. Markdown always returns unicode. ''' + if self.source is None: + status = 'in which no source text has been assinged.' + else: + status = 'which contains %d chars and %d line(s) of source.'%\ + (len(self.source), self.source.count('\n')+1) + return 'An instance of "%s" %s'% (self.__class__, status) + __unicode__ = convert # markdown should always return a unicode string - toUnicode = __unicode__ # deprecated - will be removed in 1.7 @@ -1752,7 +1755,9 @@ def markdownFromFile(input = None, text = input_file.read() input_file.close() - new_text = markdown(text, extensions, encoding, safe_mode = safe) + text = removeBOM(text, encoding) + + new_text = markdown(text, extensions, safe_mode = safe) if output: output_file = codecs.open(output, "w", encoding=encoding) @@ -1764,7 +1769,6 @@ def markdownFromFile(input = None, def markdown(text, extensions = [], - encoding = None, safe_mode = False): message(DEBUG, "in markdown.markdown(), received text:\n%s" % text) -- cgit v1.2.3