aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2011-07-14 07:26:45 -0400
committerWaylan Limberg <waylan@gmail.com>2011-07-14 07:26:45 -0400
commit0415e4c489786fc60fa9cbd180378e7202c94dc5 (patch)
tree35aba52bd35799f44c499085237a395bfe8a7e3b
parent158b3d2d99b186a1b1cba91ef06dd30462ad5c58 (diff)
downloadmarkdown-0415e4c489786fc60fa9cbd180378e7202c94dc5.tar.gz
markdown-0415e4c489786fc60fa9cbd180378e7202c94dc5.tar.bz2
markdown-0415e4c489786fc60fa9cbd180378e7202c94dc5.zip
Fixed #34. Better support for unicode text with the html_tidy extension. Force input and output of tidy to use UTF-8 and encode before and decode after passing in the text.
-rw-r--r--markdown/extensions/html_tidy.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/markdown/extensions/html_tidy.py b/markdown/extensions/html_tidy.py
index e1736eb..6aee083 100644
--- a/markdown/extensions/html_tidy.py
+++ b/markdown/extensions/html_tidy.py
@@ -40,6 +40,7 @@ class TidyExtension(markdown.Extension):
# Set defaults to match typical markdown behavior.
self.config = dict(output_xhtml=1,
show_body_only=1,
+ char_encoding='utf8'
)
# Merge in user defined configs overriding any present if nessecary.
for c in configs:
@@ -58,8 +59,10 @@ class TidyProcessor(markdown.postprocessors.Postprocessor):
def run(self, text):
# Pass text to Tidy. As Tidy does not accept unicode we need to encode
# it and decode its return value.
- return unicode(tidy.parseString(text.encode('utf-8'),
- **self.markdown.tidy_options))
+ enc = self.markdown.tidy_options.get('char_encoding', 'utf8')
+ return unicode(tidy.parseString(text.encode(enc),
+ **self.markdown.tidy_options),
+ encoding=enc)
def makeExtension(configs=None):