aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2007-12-13 05:32:03 +0000
committerWaylan Limberg <waylan@gmail.com>2007-12-13 05:32:03 +0000
commitad05a5b7c67d792f74d7af739a851e69a54f12eb (patch)
tree5fd3e8d028ead29f792fd05160925e6139731788
parent158b8ee673be7aea09e15c3830092bcab62dbfbe (diff)
downloadmarkdown-ad05a5b7c67d792f74d7af739a851e69a54f12eb.tar.gz
markdown-ad05a5b7c67d792f74d7af739a851e69a54f12eb.tar.bz2
markdown-ad05a5b7c67d792f74d7af739a851e69a54f12eb.zip
Adjustments to match unicode policy as discussed on list and other minor
cleanup in preparation for release 1.7.
-rw-r--r--markdown.py62
-rw-r--r--setup.py2
-rw-r--r--tests/markdown-test/benchmark.dat.tmp38
-rw-r--r--tests/misc/benchmark.dat.tmp103
-rw-r--r--tests/misc/utfbom.html7
-rw-r--r--tests/misc/utfbom.txt4
6 files changed, 104 insertions, 112 deletions
diff --git a/markdown.py b/markdown.py
index cc657c8..b463ef4 100644
--- a/markdown.py
+++ b/markdown.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python
-version = "1.6b"
-version_info = (1,6,2,"rc-2")
+version = "1.7"
+version_info = (1,7,0,"rc-1")
__revision__ = "$Rev$"
"""
@@ -20,9 +20,11 @@ script. (You might want to read that before you try modifying this
file.)
Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
-maintained by [Yuri Takhteyev](http://www.freewisdom.org).
+maintained by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan
+Limberg](http://achinghead.com/).
Contact: yuri [at] freewisdom.org
+ waylan [at] gmail.com
License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
@@ -1111,19 +1113,21 @@ class Markdown:
Markdown text """
- def __init__(self, source=None, # deprecated
+ def __init__(self, source=None, # depreciated
extensions=[],
extension_configs=None,
- encoding="utf-8",
safe_mode = False):
"""Creates a new Markdown instance.
- @param source: The text in Markdown format.
- @param encoding: The character encoding of <text>. """
+ @param source: The text in Markdown format. Depreciated!
+ @param extensions: A list if extensions.
+ @param extension-configs: Configuration setting for extensions.
+ @param safe_mode: Disallow raw html. """
- self.safeMode = safe_mode
- self.encoding = encoding
self.source = source
+ if source is not None:
+ message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.")
+ self.safeMode = safe_mode
self.blockGuru = BlockGuru()
self.registeredExtensions = []
self.stripTopLevelTags = 1
@@ -1155,8 +1159,8 @@ class Markdown:
LINK_ANGLED_PATTERN,
LINK_PATTERN,
IMAGE_LINK_PATTERN,
- IMAGE_REFERENCE_PATTERN,
- AUTOLINK_PATTERN,
+ IMAGE_REFERENCE_PATTERN,
+ AUTOLINK_PATTERN,
AUTOMAIL_PATTERN,
LINE_BREAK_PATTERN_2,
LINE_BREAK_PATTERN,
@@ -1684,15 +1688,18 @@ class Markdown:
"""Return the document in XHTML format.
@returns: A serialized XHTML body."""
- #try :
if source is not None: #Allow blank string
self.source = source
if not self.source:
- return ""
+ return u""
- self.source = removeBOM(self.source, self.encoding)
+ try:
+ self.source = unicode(self.source)
+ except UnicodeDecodeError:
+ message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
+ return u""
for pp in self.textPreprocessors:
self.source = pp.run(self.source)
@@ -1700,8 +1707,6 @@ class Markdown:
doc = self._transform()
xml = doc.toxml()
- #finally:
- # doc.unlink()
# Return everything but the top level tag
@@ -1714,19 +1719,17 @@ class Markdown:
return (self.docType + xml).strip()
- __str__ = convert # deprecated - will be changed in 1.7 to report
- # information about the MD instance
-
- toString = __str__ # toString() method is deprecated
-
-
- def __unicode__(self):
- """Return the document in XHTML format as a Unicode object.
- """
- return str(self)#.decode(self.encoding)
+ def __str__(self):
+ ''' Report info about instance. Markdown always returns unicode. '''
+ if self.source is None:
+ status = 'in which no source text has been assinged.'
+ else:
+ status = 'which contains %d chars and %d line(s) of source.'%\
+ (len(self.source), self.source.count('\n')+1)
+ return 'An instance of "%s" %s'% (self.__class__, status)
+ __unicode__ = convert # markdown should always return a unicode string
- toUnicode = __unicode__ # deprecated - will be removed in 1.7
@@ -1752,7 +1755,9 @@ def markdownFromFile(input = None,
text = input_file.read()
input_file.close()
- new_text = markdown(text, extensions, encoding, safe_mode = safe)
+ text = removeBOM(text, encoding)
+
+ new_text = markdown(text, extensions, safe_mode = safe)
if output:
output_file = codecs.open(output, "w", encoding=encoding)
@@ -1764,7 +1769,6 @@ def markdownFromFile(input = None,
def markdown(text,
extensions = [],
- encoding = None,
safe_mode = False):
message(DEBUG, "in markdown.markdown(), received text:\n%s" % text)
diff --git a/setup.py b/setup.py
index de4b195..dad6811 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from distutils.core import setup
setup(
name = 'markdown',
- version = '1.3',
+ version = '1.7',
description = "Python implementation of Markdown.",
author = "Manfred Stienstra and Yuri takhteyev",
maintainer = "Yuri Takhteyev",
diff --git a/tests/markdown-test/benchmark.dat.tmp b/tests/markdown-test/benchmark.dat.tmp
index 5828f9a..e8ca4f4 100644
--- a/tests/markdown-test/benchmark.dat.tmp
+++ b/tests/markdown-test/benchmark.dat.tmp
@@ -1,20 +1,20 @@
construction:0.000000:0.000000
-amps-and-angle-encoding:0.020000:0.000000
-auto-links:0.020000:0.000000
-backlash-escapes:0.110000:131072.000000
-blockquotes-with-dode-blocks:0.000000:0.000000
-hard-wrapped:0.000000:0.000000
-horizontal-rules:0.070000:0.000000
-inline-html-advanced:0.020000:0.000000
-inline-html-comments:0.030000:0.000000
-inline-html-simple:0.100000:0.000000
-links-inline:0.050000:0.000000
-links-reference:0.090000:0.000000
-literal-quotes:0.040000:0.000000
-markdown-documentation-basics:0.350000:770048.000000
-markdown-syntax:1.790000:1540096.000000
-nested-blockquotes:0.040000:0.000000
-ordered-and-unordered-list:0.240000:-16384.000000
-strong-and-em-together:0.050000:0.000000
-tabs:0.060000:0.000000
-tidyness:0.050000:0.000000
+amps-and-angle-encoding:0.250000:266240.000000
+auto-links:0.230000:0.000000
+backlash-escapes:0.950000:245760.000000
+blockquotes-with-dode-blocks:0.070000:0.000000
+hard-wrapped:0.060000:0.000000
+horizontal-rules:0.700000:0.000000
+inline-html-advanced:0.280000:0.000000
+inline-html-comments:0.340000:0.000000
+inline-html-simple:0.970000:0.000000
+links-inline:0.500000:0.000000
+links-reference:0.650000:0.000000
+literal-quotes:0.390000:0.000000
+markdown-documentation-basics:3.390000:1437696.000000
+markdown-syntax:14.780000:2035712.000000
+nested-blockquotes:0.520000:-110592.000000
+ordered-and-unordered-list:2.210000:0.000000
+strong-and-em-together:0.640000:0.000000
+tabs:0.690000:0.000000
+tidyness:0.590000:0.000000
diff --git a/tests/misc/benchmark.dat.tmp b/tests/misc/benchmark.dat.tmp
index 4fecb5c..b155e1c 100644
--- a/tests/misc/benchmark.dat.tmp
+++ b/tests/misc/benchmark.dat.tmp
@@ -1,54 +1,53 @@
construction:0.000000:0.000000
-adjacent-headers:0.010000:0.000000
-amp-in-url:0.010000:0.000000
-ampersand:0.000000:0.000000
-arabic:0.040000:0.000000
-attributes2:0.010000:0.000000
-bidi:0.080000:0.000000
+adjacent-headers:0.050000:0.000000
+amp-in-url:0.040000:0.000000
+ampersand:0.040000:0.000000
+arabic:0.330000:0.000000
+attributes2:0.090000:0.000000
+bidi:0.910000:0.000000
blank:0.000000:0.000000
-blank-block-quote:0.000000:0.000000
-blockquote-hr:0.020000:0.000000
-br:0.020000:0.000000
-bracket_re:1.230000:0.000000
-code-first-line:0.010000:0.000000
-comments:0.010000:0.000000
-div:0.010000:0.000000
-email:0.010000:0.000000
-funky-list:0.020000:0.000000
-h1:0.010000:0.000000
-hash:0.020000:0.000000
-headers:0.020000:0.000000
-hline:0.020000:0.000000
-html:0.030000:0.000000
-image:0.010000:0.000000
-image-2:0.010000:0.000000
-image_in_links:0.020000:0.000000
-inside_html:0.020000:0.000000
-japanese:0.050000:0.000000
-lazy-block-quote:0.020000:0.000000
-lists:0.050000:0.000000
-lists2:0.020000:0.000000
-lists3:0.010000:0.000000
-lists4:0.020000:0.000000
-lists5:0.010000:0.000000
-markup-inside-p:0.020000:0.000000
-mismatched-tags:0.020000:0.000000
-more_comments:0.010000:0.000000
-multi-line-tags:0.030000:0.000000
-multi-paragraph-block-quote:0.020000:0.000000
-multi-test:0.050000:0.000000
-multiline-comments:0.020000:0.000000
-normalize:0.020000:0.000000
-numeric-entity:0.030000:0.000000
-php:0.030000:0.000000
-pre:0.030000:0.000000
-russian:0.070000:-12288.000000
-some-test:0.090000:0.000000
-span:0.040000:0.000000
-stronintags:0.050000:0.000000
-tabs-in-lists:0.070000:0.000000
-two-spaces:0.060000:0.000000
-uche:0.050000:0.000000
-underscores:0.040000:0.000000
-url_spaces:0.030000:0.000000
-utfbom:0.020000:0.000000
+blank-block-quote:0.050000:0.000000
+blockquote-hr:0.210000:0.000000
+br:0.170000:0.000000
+bracket_re:21.910000:0.000000
+code-first-line:0.050000:0.000000
+comments:0.110000:0.000000
+div:0.130000:0.000000
+email:0.130000:0.000000
+funky-list:0.220000:0.000000
+h1:0.110000:0.000000
+hash:0.160000:0.000000
+headers:0.180000:0.000000
+hline:0.110000:0.000000
+html:0.310000:0.000000
+image:0.150000:0.000000
+image-2:0.220000:0.000000
+image_in_links:0.190000:0.000000
+inside_html:0.180000:0.000000
+japanese:0.540000:0.000000
+lazy-block-quote:0.190000:0.000000
+lists:0.450000:0.000000
+lists2:0.170000:0.000000
+lists3:0.170000:0.000000
+lists4:0.210000:0.000000
+lists5:0.260000:0.000000
+markup-inside-p:0.270000:0.000000
+mismatched-tags:0.180000:0.000000
+more_comments:0.210000:0.000000
+multi-line-tags:0.260000:0.000000
+multi-paragraph-block-quote:0.280000:0.000000
+multi-test:0.540000:0.000000
+multiline-comments:0.340000:0.000000
+normalize:0.270000:0.000000
+numeric-entity:0.310000:0.000000
+php:0.350000:0.000000
+pre:0.310000:0.000000
+russian:0.760000:-172032.000000
+some-test:0.850000:0.000000
+span:0.500000:0.000000
+stronintags:0.500000:0.000000
+tabs-in-lists:0.670000:0.000000
+two-spaces:0.550000:0.000000
+uche:0.540000:0.000000
+underscores:0.490000:0.000000
+url_spaces:0.420000:0.000000
diff --git a/tests/misc/utfbom.html b/tests/misc/utfbom.html
deleted file mode 100644
index a3ef50d..0000000
--- a/tests/misc/utfbom.html
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-<h1>A heading.</h1>
-<p>text text text text text text.
-</p>
-
-
diff --git a/tests/misc/utfbom.txt b/tests/misc/utfbom.txt
deleted file mode 100644
index 1c88258..0000000
--- a/tests/misc/utfbom.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-A heading.
-==========
-
-text text text text text text. \ No newline at end of file