From ad05a5b7c67d792f74d7af739a851e69a54f12eb Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 13 Dec 2007 05:32:03 +0000 Subject: Adjustments to match unicode policy as discussed on list and other minor cleanup in preparation for release 1.7. --- markdown.py | 62 ++++++++++---------- setup.py | 2 +- tests/markdown-test/benchmark.dat.tmp | 38 ++++++------- tests/misc/benchmark.dat.tmp | 103 +++++++++++++++++----------------- tests/misc/utfbom.html | 7 --- tests/misc/utfbom.txt | 4 -- 6 files changed, 104 insertions(+), 112 deletions(-) delete mode 100644 tests/misc/utfbom.html delete mode 100644 tests/misc/utfbom.txt diff --git a/markdown.py b/markdown.py index cc657c8..b463ef4 100644 --- a/markdown.py +++ b/markdown.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -version = "1.6b" -version_info = (1,6,2,"rc-2") +version = "1.7" +version_info = (1,7,0,"rc-1") __revision__ = "$Rev$" """ @@ -20,9 +20,11 @@ script. (You might want to read that before you try modifying this file.) Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and -maintained by [Yuri Takhteyev](http://www.freewisdom.org). +maintained by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan +Limberg](http://achinghead.com/). Contact: yuri [at] freewisdom.org + waylan [at] gmail.com License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD @@ -1111,19 +1113,21 @@ class Markdown: Markdown text """ - def __init__(self, source=None, # deprecated + def __init__(self, source=None, # depreciated extensions=[], extension_configs=None, - encoding="utf-8", safe_mode = False): """Creates a new Markdown instance. - @param source: The text in Markdown format. - @param encoding: The character encoding of . """ + @param source: The text in Markdown format. Depreciated! + @param extensions: A list if extensions. + @param extension-configs: Configuration setting for extensions. + @param safe_mode: Disallow raw html. """ - self.safeMode = safe_mode - self.encoding = encoding self.source = source + if source is not None: + message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.") + self.safeMode = safe_mode self.blockGuru = BlockGuru() self.registeredExtensions = [] self.stripTopLevelTags = 1 @@ -1155,8 +1159,8 @@ class Markdown: LINK_ANGLED_PATTERN, LINK_PATTERN, IMAGE_LINK_PATTERN, - IMAGE_REFERENCE_PATTERN, - AUTOLINK_PATTERN, + IMAGE_REFERENCE_PATTERN, + AUTOLINK_PATTERN, AUTOMAIL_PATTERN, LINE_BREAK_PATTERN_2, LINE_BREAK_PATTERN, @@ -1684,15 +1688,18 @@ class Markdown: """Return the document in XHTML format. @returns: A serialized XHTML body.""" - #try : if source is not None: #Allow blank string self.source = source if not self.source: - return "" + return u"" - self.source = removeBOM(self.source, self.encoding) + try: + self.source = unicode(self.source) + except UnicodeDecodeError: + message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.') + return u"" for pp in self.textPreprocessors: self.source = pp.run(self.source) @@ -1700,8 +1707,6 @@ class Markdown: doc = self._transform() xml = doc.toxml() - #finally: - # doc.unlink() # Return everything but the top level tag @@ -1714,19 +1719,17 @@ class Markdown: return (self.docType + xml).strip() - __str__ = convert # deprecated - will be changed in 1.7 to report - # information about the MD instance - - toString = __str__ # toString() method is deprecated - - - def __unicode__(self): - """Return the document in XHTML format as a Unicode object. - """ - return str(self)#.decode(self.encoding) + def __str__(self): + ''' Report info about instance. Markdown always returns unicode. ''' + if self.source is None: + status = 'in which no source text has been assinged.' + else: + status = 'which contains %d chars and %d line(s) of source.'%\ + (len(self.source), self.source.count('\n')+1) + return 'An instance of "%s" %s'% (self.__class__, status) + __unicode__ = convert # markdown should always return a unicode string - toUnicode = __unicode__ # deprecated - will be removed in 1.7 @@ -1752,7 +1755,9 @@ def markdownFromFile(input = None, text = input_file.read() input_file.close() - new_text = markdown(text, extensions, encoding, safe_mode = safe) + text = removeBOM(text, encoding) + + new_text = markdown(text, extensions, safe_mode = safe) if output: output_file = codecs.open(output, "w", encoding=encoding) @@ -1764,7 +1769,6 @@ def markdownFromFile(input = None, def markdown(text, extensions = [], - encoding = None, safe_mode = False): message(DEBUG, "in markdown.markdown(), received text:\n%s" % text) diff --git a/setup.py b/setup.py index de4b195..dad6811 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from distutils.core import setup setup( name = 'markdown', - version = '1.3', + version = '1.7', description = "Python implementation of Markdown.", author = "Manfred Stienstra and Yuri takhteyev", maintainer = "Yuri Takhteyev", diff --git a/tests/markdown-test/benchmark.dat.tmp b/tests/markdown-test/benchmark.dat.tmp index 5828f9a..e8ca4f4 100644 --- a/tests/markdown-test/benchmark.dat.tmp +++ b/tests/markdown-test/benchmark.dat.tmp @@ -1,20 +1,20 @@ construction:0.000000:0.000000 -amps-and-angle-encoding:0.020000:0.000000 -auto-links:0.020000:0.000000 -backlash-escapes:0.110000:131072.000000 -blockquotes-with-dode-blocks:0.000000:0.000000 -hard-wrapped:0.000000:0.000000 -horizontal-rules:0.070000:0.000000 -inline-html-advanced:0.020000:0.000000 -inline-html-comments:0.030000:0.000000 -inline-html-simple:0.100000:0.000000 -links-inline:0.050000:0.000000 -links-reference:0.090000:0.000000 -literal-quotes:0.040000:0.000000 -markdown-documentation-basics:0.350000:770048.000000 -markdown-syntax:1.790000:1540096.000000 -nested-blockquotes:0.040000:0.000000 -ordered-and-unordered-list:0.240000:-16384.000000 -strong-and-em-together:0.050000:0.000000 -tabs:0.060000:0.000000 -tidyness:0.050000:0.000000 +amps-and-angle-encoding:0.250000:266240.000000 +auto-links:0.230000:0.000000 +backlash-escapes:0.950000:245760.000000 +blockquotes-with-dode-blocks:0.070000:0.000000 +hard-wrapped:0.060000:0.000000 +horizontal-rules:0.700000:0.000000 +inline-html-advanced:0.280000:0.000000 +inline-html-comments:0.340000:0.000000 +inline-html-simple:0.970000:0.000000 +links-inline:0.500000:0.000000 +links-reference:0.650000:0.000000 +literal-quotes:0.390000:0.000000 +markdown-documentation-basics:3.390000:1437696.000000 +markdown-syntax:14.780000:2035712.000000 +nested-blockquotes:0.520000:-110592.000000 +ordered-and-unordered-list:2.210000:0.000000 +strong-and-em-together:0.640000:0.000000 +tabs:0.690000:0.000000 +tidyness:0.590000:0.000000 diff --git a/tests/misc/benchmark.dat.tmp b/tests/misc/benchmark.dat.tmp index 4fecb5c..b155e1c 100644 --- a/tests/misc/benchmark.dat.tmp +++ b/tests/misc/benchmark.dat.tmp @@ -1,54 +1,53 @@ construction:0.000000:0.000000 -adjacent-headers:0.010000:0.000000 -amp-in-url:0.010000:0.000000 -ampersand:0.000000:0.000000 -arabic:0.040000:0.000000 -attributes2:0.010000:0.000000 -bidi:0.080000:0.000000 +adjacent-headers:0.050000:0.000000 +amp-in-url:0.040000:0.000000 +ampersand:0.040000:0.000000 +arabic:0.330000:0.000000 +attributes2:0.090000:0.000000 +bidi:0.910000:0.000000 blank:0.000000:0.000000 -blank-block-quote:0.000000:0.000000 -blockquote-hr:0.020000:0.000000 -br:0.020000:0.000000 -bracket_re:1.230000:0.000000 -code-first-line:0.010000:0.000000 -comments:0.010000:0.000000 -div:0.010000:0.000000 -email:0.010000:0.000000 -funky-list:0.020000:0.000000 -h1:0.010000:0.000000 -hash:0.020000:0.000000 -headers:0.020000:0.000000 -hline:0.020000:0.000000 -html:0.030000:0.000000 -image:0.010000:0.000000 -image-2:0.010000:0.000000 -image_in_links:0.020000:0.000000 -inside_html:0.020000:0.000000 -japanese:0.050000:0.000000 -lazy-block-quote:0.020000:0.000000 -lists:0.050000:0.000000 -lists2:0.020000:0.000000 -lists3:0.010000:0.000000 -lists4:0.020000:0.000000 -lists5:0.010000:0.000000 -markup-inside-p:0.020000:0.000000 -mismatched-tags:0.020000:0.000000 -more_comments:0.010000:0.000000 -multi-line-tags:0.030000:0.000000 -multi-paragraph-block-quote:0.020000:0.000000 -multi-test:0.050000:0.000000 -multiline-comments:0.020000:0.000000 -normalize:0.020000:0.000000 -numeric-entity:0.030000:0.000000 -php:0.030000:0.000000 -pre:0.030000:0.000000 -russian:0.070000:-12288.000000 -some-test:0.090000:0.000000 -span:0.040000:0.000000 -stronintags:0.050000:0.000000 -tabs-in-lists:0.070000:0.000000 -two-spaces:0.060000:0.000000 -uche:0.050000:0.000000 -underscores:0.040000:0.000000 -url_spaces:0.030000:0.000000 -utfbom:0.020000:0.000000 +blank-block-quote:0.050000:0.000000 +blockquote-hr:0.210000:0.000000 +br:0.170000:0.000000 +bracket_re:21.910000:0.000000 +code-first-line:0.050000:0.000000 +comments:0.110000:0.000000 +div:0.130000:0.000000 +email:0.130000:0.000000 +funky-list:0.220000:0.000000 +h1:0.110000:0.000000 +hash:0.160000:0.000000 +headers:0.180000:0.000000 +hline:0.110000:0.000000 +html:0.310000:0.000000 +image:0.150000:0.000000 +image-2:0.220000:0.000000 +image_in_links:0.190000:0.000000 +inside_html:0.180000:0.000000 +japanese:0.540000:0.000000 +lazy-block-quote:0.190000:0.000000 +lists:0.450000:0.000000 +lists2:0.170000:0.000000 +lists3:0.170000:0.000000 +lists4:0.210000:0.000000 +lists5:0.260000:0.000000 +markup-inside-p:0.270000:0.000000 +mismatched-tags:0.180000:0.000000 +more_comments:0.210000:0.000000 +multi-line-tags:0.260000:0.000000 +multi-paragraph-block-quote:0.280000:0.000000 +multi-test:0.540000:0.000000 +multiline-comments:0.340000:0.000000 +normalize:0.270000:0.000000 +numeric-entity:0.310000:0.000000 +php:0.350000:0.000000 +pre:0.310000:0.000000 +russian:0.760000:-172032.000000 +some-test:0.850000:0.000000 +span:0.500000:0.000000 +stronintags:0.500000:0.000000 +tabs-in-lists:0.670000:0.000000 +two-spaces:0.550000:0.000000 +uche:0.540000:0.000000 +underscores:0.490000:0.000000 +url_spaces:0.420000:0.000000 diff --git a/tests/misc/utfbom.html b/tests/misc/utfbom.html deleted file mode 100644 index a3ef50d..0000000 --- a/tests/misc/utfbom.html +++ /dev/null @@ -1,7 +0,0 @@ - - -

A heading.

-

text text text text text text. -

- - diff --git a/tests/misc/utfbom.txt b/tests/misc/utfbom.txt deleted file mode 100644 index 1c88258..0000000 --- a/tests/misc/utfbom.txt +++ /dev/null @@ -1,4 +0,0 @@ -A heading. -========== - -text text text text text text. \ No newline at end of file -- cgit v1.2.3