All internal encoding of output now uses the 'xmlcharrefreplace' error handler. Also added a note to the docs. Anyone doing their own encoding of output should be as well.

author: Waylan Limberg <waylan@gmail.com> 2011-07-28 10:02:38 -0400
committer: Waylan Limberg <waylan@gmail.com> 2011-07-28 10:02:38 -0400
commit: 80f6ac599f79546512b522566cb421acea1aca19 (patch)
tree: b60ff3b1680a8d79e74fe87d9f12099fc970e089
parent: 5ad27f85db3e5c0a0e6f05ab099bfbfc8b0d0dd5 (diff)
download: markdown-80f6ac599f79546512b522566cb421acea1aca19.tar.gz
markdown-80f6ac599f79546512b522566cb421acea1aca19.tar.bz2
markdown-80f6ac599f79546512b522566cb421acea1aca19.zip
3 files changed, 13 insertions, 6 deletions
diff --git a/docs/using_as_module.txt b/docs/using_as_module.txt
index 343fee0..f50a0ec 100644
--- a/docs/using_as_module.txt
+++ b/docs/using_as_module.txt
@@ -39,7 +39,10 @@ The following options are available on the `markdown.markdown` function:
 
     If you want to write the output to disk, you must encode it yourself:
 
-        output_file = codecs.open("some_file.html", "w", encoding="utf-8")
+        output_file = codecs.open("some_file.html", "w", 
+                                  encoding="utf-8", 
+                                  errors="xmlcharrefreplace"
+        )
         output_file.write(html)
 
 * `extensions`: A list of extensions.
@@ -178,7 +181,8 @@ the following required options:
     * or `None` (default) which will write to `stdout`.
 
 * `encoding`: The encoding of the source text file. Defaults to 
-  "utf-8". The same encoding will always be used for the output file.
+  "utf-8". The same encoding will always be used for the output file. 
+  The 'xmlcharrefreplace' error handler is used when encoding the output.
 
     **Note:** This is the only place that decoding and encoding of unicode
     takes place in Python-Markdown. If this rather naive solution does not
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 562ee5f..630198e 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -311,7 +311,8 @@ class Markdown:
         Decodes the file using the provided encoding (defaults to utf-8),
         passes the file content to markdown, and outputs the html to either
         the provided stream or the file with provided name, using the same
-        encoding as the source file.
+        encoding as the source file. The 'xmlcharrefreplace' error handler is
+        used when encoding the output.
 
         **Note:** This is the only place that decoding and encoding of unicode
         takes place in Python-Markdown.  (All other code is unicode-in /
@@ -341,11 +342,13 @@ class Markdown:
 
         # Write to file or stdout
         if isinstance(output, (str, unicode)):
-            output_file = codecs.open(output, "w", encoding=encoding)
+            output_file = codecs.open(output, "w", 
+                                      encoding=encoding, 
+                                      errors="xmlcharrefreplace")
             output_file.write(html)
             output_file.close()
         else:
-            output.write(html.encode(encoding))
+            output.write(html.encode(encoding, errors="xmlcharrefreplace"))
 
         return self
 
diff --git a/tests/__init__.py b/tests/__init__.py
index 8fbc5d9..b274b1e 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -51,7 +51,7 @@ def get_args(file, config):
 
 def normalize(text):
     """ Normalize whitespace for a string of html using tidy. """
-    return str(tidy.parseString(text.encode('utf-8'), 
+    return str(tidy.parseString(text.encode('utf-8', errors='xmlcharrefreplace'), 
                                     drop_empty_paras=0,
                                     fix_backslash=0,
                                     fix_bad_comments=0,
author	Waylan Limberg <waylan@gmail.com>	2011-07-28 10:02:38 -0400
committer	Waylan Limberg <waylan@gmail.com>	2011-07-28 10:02:38 -0400
commit	80f6ac599f79546512b522566cb421acea1aca19 (patch)
tree	b60ff3b1680a8d79e74fe87d9f12099fc970e089
parent	5ad27f85db3e5c0a0e6f05ab099bfbfc8b0d0dd5 (diff)
download	markdown-80f6ac599f79546512b522566cb421acea1aca19.tar.gz markdown-80f6ac599f79546512b522566cb421acea1aca19.tar.bz2 markdown-80f6ac599f79546512b522566cb421acea1aca19.zip