diff options
author | Waylan Limberg <waylan@gmail.com> | 2011-07-14 07:26:45 -0400 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2011-07-14 07:26:45 -0400 |
commit | 0415e4c489786fc60fa9cbd180378e7202c94dc5 (patch) | |
tree | 35aba52bd35799f44c499085237a395bfe8a7e3b | |
parent | 158b3d2d99b186a1b1cba91ef06dd30462ad5c58 (diff) | |
download | markdown-0415e4c489786fc60fa9cbd180378e7202c94dc5.tar.gz markdown-0415e4c489786fc60fa9cbd180378e7202c94dc5.tar.bz2 markdown-0415e4c489786fc60fa9cbd180378e7202c94dc5.zip |
Fixed #34. Better support for unicode text with the html_tidy extension. Force input and output of tidy to use UTF-8 and encode before and decode after passing in the text.
-rw-r--r-- | markdown/extensions/html_tidy.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/markdown/extensions/html_tidy.py b/markdown/extensions/html_tidy.py index e1736eb..6aee083 100644 --- a/markdown/extensions/html_tidy.py +++ b/markdown/extensions/html_tidy.py @@ -40,6 +40,7 @@ class TidyExtension(markdown.Extension): # Set defaults to match typical markdown behavior. self.config = dict(output_xhtml=1, show_body_only=1, + char_encoding='utf8' ) # Merge in user defined configs overriding any present if nessecary. for c in configs: @@ -58,8 +59,10 @@ class TidyProcessor(markdown.postprocessors.Postprocessor): def run(self, text): # Pass text to Tidy. As Tidy does not accept unicode we need to encode # it and decode its return value. - return unicode(tidy.parseString(text.encode('utf-8'), - **self.markdown.tidy_options)) + enc = self.markdown.tidy_options.get('char_encoding', 'utf8') + return unicode(tidy.parseString(text.encode(enc), + **self.markdown.tidy_options), + encoding=enc) def makeExtension(configs=None): |