From 262228bd6afdab5602e960be9b2cc772cd6aad19 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Tue, 30 Oct 2007 01:19:22 +0000 Subject: Fixed BOM removal with non-ASCII input [1817528] - Thanks for the patch Malcolm Tredinnick --- markdown.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'markdown.py') diff --git a/markdown.py b/markdown.py index d09f1b2..e563866 100644 --- a/markdown.py +++ b/markdown.py @@ -62,15 +62,15 @@ RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), # 0780-07BF - Thaana # 07C0-07FF - Nko -BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ), - 'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"), - unicode(codecs.BOM_UTF16_BE, "utf-16")), - #'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"), - # unicode(codecs.BOM_UTF32_BE, "utf-32")), +BOMS = { 'utf-8' : (codecs.BOM_UTF8, ), + 'utf-16' : (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE), + #'utf-32' : (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE) } def removeBOM(text, encoding): + convert = isinstance(text, unicode) for bom in BOMS[encoding]: + bom = convert and bom.decode(encoding) or bom if text.startswith(bom): return text.lstrip(bom) return text @@ -454,7 +454,7 @@ class LinePreprocessor (Preprocessor): return lines def _isLine(self, block) : - """Determines if a block should be replaced with an
""" + """Determines if a block should be replaced with an <:wHR>""" if block.startswith(" ") : return 0 # a code block text = "".join([x for x in block if not x.isspace()]) if len(text) <= 2 : -- cgit v1.2.3