aboutsummaryrefslogtreecommitdiffstats
path: root/markdown.py
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2007-10-30 01:19:22 +0000
committerWaylan Limberg <waylan@gmail.com>2007-10-30 01:19:22 +0000
commit262228bd6afdab5602e960be9b2cc772cd6aad19 (patch)
tree1f47904c774cdd744a31cd632194ed3cd45ccc5c /markdown.py
parent0c59b31f02906e6c1c35928afa41bab115cf4ec0 (diff)
downloadmarkdown-262228bd6afdab5602e960be9b2cc772cd6aad19.tar.gz
markdown-262228bd6afdab5602e960be9b2cc772cd6aad19.tar.bz2
markdown-262228bd6afdab5602e960be9b2cc772cd6aad19.zip
Fixed BOM removal with non-ASCII input [1817528] - Thanks for the patch Malcolm Tredinnick
Diffstat (limited to 'markdown.py')
-rw-r--r--markdown.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/markdown.py b/markdown.py
index d09f1b2..e563866 100644
--- a/markdown.py
+++ b/markdown.py
@@ -62,15 +62,15 @@ RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
# 0780-07BF - Thaana
# 07C0-07FF - Nko
-BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ),
- 'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"),
- unicode(codecs.BOM_UTF16_BE, "utf-16")),
- #'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"),
- # unicode(codecs.BOM_UTF32_BE, "utf-32")),
+BOMS = { 'utf-8' : (codecs.BOM_UTF8, ),
+ 'utf-16' : (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE),
+ #'utf-32' : (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)
}
def removeBOM(text, encoding):
+ convert = isinstance(text, unicode)
for bom in BOMS[encoding]:
+ bom = convert and bom.decode(encoding) or bom
if text.startswith(bom):
return text.lstrip(bom)
return text
@@ -454,7 +454,7 @@ class LinePreprocessor (Preprocessor):
return lines
def _isLine(self, block) :
- """Determines if a block should be replaced with an <HR>"""
+ """Determines if a block should be replaced with an <:wHR>"""
if block.startswith(" ") : return 0 # a code block
text = "".join([x for x in block if not x.isspace()])
if len(text) <= 2 :