From b688bf59dee2ed1552c0b232a43066ba18393ab0 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Sun, 23 Aug 2009 14:42:52 -0400 Subject: Fixed Ticket 43. Apparenlty ElementTree does not recognize 'utf8' as an alias of 'utf-8' and outputs invalid xml. We never noticed as stripTopLevelTags removes the offending fragment. However, there are legitimate uses for turning off stripTopLevelTags. Therefore, from now on we will be using 'utf-8' internally. Thanks to Mark Eichin for the report. --- docs/using_as_module.txt | 8 ++++---- markdown/__init__.py | 2 +- test-markdown.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/using_as_module.txt b/docs/using_as_module.txt index cfeb88d..130d0a7 100644 --- a/docs/using_as_module.txt +++ b/docs/using_as_module.txt @@ -20,13 +20,13 @@ string should work) and returns output as Unicode. Do not pass encoded strings If your input is encoded, e.g. as UTF-8, it is your responsibility to decode it. E.g.: - input_file = codecs.open("some_file.txt", mode="r", encoding="utf8") + input_file = codecs.open("some_file.txt", mode="r", encoding="utf-8") text = input_file.read() html = markdown.markdown(text, extensions) If you later want to write it to disk, you should encode it yourself: - output_file = codecs.open("some_file.html", "w", encoding="utf8") + output_file = codecs.open("some_file.html", "w", encoding="utf-8") output_file.write(html) More Options @@ -61,7 +61,7 @@ The ``Markdown`` class has the method ``convertFile`` which reads in a file and writes out to a file-like-object: md = markdown.Markdown() - md.convertFile(input="in.txt", output="out.html", encoding="utf8") + md.convertFile(input="in.txt", output="out.html", encoding="utf-8") The markdown module also includes a shortcut function ``markdownFromFile`` that wraps the above method. @@ -69,7 +69,7 @@ wraps the above method. markdown.markdownFromFile(input="in.txt", output="out.html", extensions=[], - encoding="utf8", + encoding="utf-8", safe=False) In either case, if the ``output`` keyword is passed a file name (i.e.: diff --git a/markdown/__init__.py b/markdown/__init__.py index 086fde9..6010715 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -395,7 +395,7 @@ class Markdown: root = newRoot # Serialize _properly_. Strip top-level tags. - output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf8")) + output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8")) if self.stripTopLevelTags: try: start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2 diff --git a/test-markdown.py b/test-markdown.py index 95914c4..e5dd870 100755 --- a/test-markdown.py +++ b/test-markdown.py @@ -160,7 +160,7 @@ class TestRunner : if not os.path.exists(TMP_DIR): os.mkdir(TMP_DIR) - def test_directory(self, dir, measure_time=False, safe_mode=False, encoding="utf8", output_format='xhtml1') : + def test_directory(self, dir, measure_time=False, safe_mode=False, encoding="utf-8", output_format='xhtml1') : self.encoding = encoding benchmark_file_name = os.path.join(dir, "benchmark.dat") self.saved_benchmarks = {} @@ -209,7 +209,7 @@ class TestRunner : self.html_diff_file.write("") if sys.version < "3.0": - self.html_diff_file.write(self.diffs_buffer.decode("utf8")) + self.html_diff_file.write(self.diffs_buffer.decode("utf-8")) self.html_diff_file.write(FOOTER) self.html_diff_file.close() @@ -249,8 +249,8 @@ class TestRunner : conversion_mem = memory(mem) self.md.reset() - expected_lines = [x.encode("utf8") for x in smart_split(expected_output)] - actual_lines = [x.encode("utf8") for x in smart_split(actual_output)] + expected_lines = [x.encode("utf-8") for x in smart_split(expected_output)] + actual_lines = [x.encode("utf-8") for x in smart_split(actual_output)] #diff = difflib.ndiff(expected_output.split("\n"), # actual_output.split("\n")) -- cgit v1.2.3