diff options
author | Waylan Limberg <waylan@gmail.com> | 2007-10-30 01:19:22 +0000 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2007-10-30 01:19:22 +0000 |
commit | 262228bd6afdab5602e960be9b2cc772cd6aad19 (patch) | |
tree | 1f47904c774cdd744a31cd632194ed3cd45ccc5c | |
parent | 0c59b31f02906e6c1c35928afa41bab115cf4ec0 (diff) | |
download | markdown-262228bd6afdab5602e960be9b2cc772cd6aad19.tar.gz markdown-262228bd6afdab5602e960be9b2cc772cd6aad19.tar.bz2 markdown-262228bd6afdab5602e960be9b2cc772cd6aad19.zip |
Fixed BOM removal with non-ASCII input [1817528] - Thanks for the patch Malcolm Tredinnick
-rw-r--r-- | markdown.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/markdown.py b/markdown.py index d09f1b2..e563866 100644 --- a/markdown.py +++ b/markdown.py @@ -62,15 +62,15 @@ RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), # 0780-07BF - Thaana # 07C0-07FF - Nko -BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ), - 'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"), - unicode(codecs.BOM_UTF16_BE, "utf-16")), - #'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"), - # unicode(codecs.BOM_UTF32_BE, "utf-32")), +BOMS = { 'utf-8' : (codecs.BOM_UTF8, ), + 'utf-16' : (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE), + #'utf-32' : (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE) } def removeBOM(text, encoding): + convert = isinstance(text, unicode) for bom in BOMS[encoding]: + bom = convert and bom.decode(encoding) or bom if text.startswith(bom): return text.lstrip(bom) return text @@ -454,7 +454,7 @@ class LinePreprocessor (Preprocessor): return lines def _isLine(self, block) : - """Determines if a block should be replaced with an <HR>""" + """Determines if a block should be replaced with an <:wHR>""" if block.startswith(" ") : return 0 # a code block text = "".join([x for x in block if not x.isspace()]) if len(text) <= 2 : |