Added our own xhtml searializer. We no longer use a xml searializer to output xhtml. This fixes #9 among other bugs. The test suite even had bad tests that should have been failing. They also have been corrected.

author: Waylan Limberg <waylan@gmail.com> 2011-06-16 08:31:02 -0400
committer: Waylan Limberg <waylan@gmail.com> 2011-06-16 08:31:02 -0400
commit: 1a42fa8602a99e0a8807f517ac44ecda77d86c22 (patch)
tree: 115faa3db53a6f74551413491f0ef2cd1e220566
parent: 619613e2d65cc339e3950483916ce29ed675b4fc (diff)
download: markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.tar.gz
markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.tar.bz2
markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.zip
6 files changed, 52 insertions, 37 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 9b964b7..762da31 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -43,7 +43,7 @@ from treeprocessors import build_treeprocessors
 from inlinepatterns import build_inlinepatterns
 from postprocessors import build_postprocessors
 from extensions import Extension
-import html4
+from searializers import to_html_string, to_xhtml_string
 
 # For backwards compatibility in the 2.0.x series
 # The things defined in these modules started off in __init__.py so third
@@ -67,10 +67,10 @@ class Markdown:
     }
     
     output_formats = {
-        'html'  : html4.to_html_string,
-        'html4' : html4.to_html_string,
-        'xhtml' : util.etree.tostring,
-        'xhtml1': util.etree.tostring,
+        'html'  : to_html_string,
+        'html4' : to_html_string,
+        'xhtml' : to_xhtml_string,
+        'xhtml1': to_xhtml_string,
     }
 
     def __init__(self, extensions=[], **kwargs):
diff --git a/markdown/html4.py b/markdown/searializers.py
index 611e628..39dcb56 100644
--- a/markdown/html4.py
+++ b/markdown/searializers.py
@@ -119,6 +119,8 @@ def _escape_attrib_html(text, encoding):
     try:
         if "&" in text:
             text = text.replace("&", "&amp;")
+        if "<" in text:
+            text = text.replace("<", "&lt;")
         if ">" in text:
             text = text.replace(">", "&gt;")
         if "\"" in text:
@@ -128,7 +130,7 @@ def _escape_attrib_html(text, encoding):
         _raise_serialization_error(text)
 
 
-def _serialize_html(write, elem, encoding, qnames, namespaces):
+def _serialize_html(write, elem, encoding, qnames, namespaces, format):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
@@ -141,7 +143,7 @@ def _serialize_html(write, elem, encoding, qnames, namespaces):
             if text:
                 write(_escape_cdata(text, encoding))
             for e in elem:
-                _serialize_html(write, e, encoding, qnames, None)
+                _serialize_html(write, e, encoding, qnames, None, format)
         else:
             write("<" + tag)
             items = elem.items()
@@ -166,24 +168,28 @@ def _serialize_html(write, elem, encoding, qnames, namespaces):
                             k.encode(encoding),
                             _escape_attrib(v, encoding)
                             ))
-            write(">")
-            tag = tag.lower()
-            if text:
-                if tag == "script" or tag == "style":
-                    write(_encode(text, encoding))
-                else:
-                    write(_escape_cdata(text, encoding))
-            for e in elem:
-                _serialize_html(write, e, encoding, qnames, None)
-            if tag not in HTML_EMPTY:
-                write("</" + tag + ">")
+            if format == "xhtml" and tag in HTML_EMPTY:
+                write(" />")
+            else:
+                write(">")
+                tag = tag.lower()
+                if text:
+                    if tag == "script" or tag == "style":
+                        write(_encode(text, encoding))
+                    else:
+                        write(_escape_cdata(text, encoding))
+                for e in elem:
+                    _serialize_html(write, e, encoding, qnames, None, format)
+                if tag not in HTML_EMPTY:
+                    write("</" + tag + ">")
     if elem.tail:
         write(_escape_cdata(elem.tail, encoding))
 
 def write_html(root, f,
           # keyword arguments
           encoding="us-ascii",
-          default_namespace=None):
+          default_namespace=None,
+          format="html"):
     assert root is not None
     if not hasattr(f, "write"):
         f = open(f, "wb")
@@ -194,7 +200,7 @@ def write_html(root, f,
             root, encoding, default_namespace
             )
     _serialize_html(
-                write, root, encoding, qnames, namespaces
+                write, root, encoding, qnames, namespaces, format
                 )
 
 # --------------------------------------------------------------------
@@ -273,5 +279,14 @@ def to_html_string(element, encoding=None):
     data = []
     file = dummy()
     file.write = data.append
-    write_html(ElementTree(element).getroot(),file,encoding)
+    write_html(ElementTree(element).getroot(), file, encoding, format="html")
+    return "".join(data)
+
+def to_xhtml_string(element, encoding=None):
+    class dummy:
+        pass
+    data = []
+    file = dummy()
+    file.write = data.append
+    write_html(ElementTree(element).getroot(), file, encoding, format="xhtml")
     return "".join(data)
diff --git a/tests/extensions/extra/tables.html b/tests/extensions/extra/tables.html
index c931e6a..1d626da 100644
--- a/tests/extensions/extra/tables.html
+++ b/tests/extensions/extra/tables.html
@@ -85,13 +85,13 @@
 </thead>
 <tbody>
 <tr>
-<td align="left" />
+<td align="left"></td>
 <td align="center">Q</td>
-<td align="right" />
+<td align="right"></td>
 </tr>
 <tr>
 <td align="left">W</td>
-<td align="center" />
+<td align="center"></td>
 <td align="right">W</td>
 </tr>
 </tbody>
@@ -106,13 +106,13 @@
 </thead>
 <tbody>
 <tr>
-<td />
+<td></td>
 <td>Q</td>
-<td />
+<td></td>
 </tr>
 <tr>
 <td>W</td>
-<td />
+<td></td>
 <td>W</td>
 </tr>
 </tbody>
diff --git a/tests/misc/attributes2.html b/tests/misc/attributes2.html
index 5971cc8..b78fee0 100644
--- a/tests/misc/attributes2.html
+++ b/tests/misc/attributes2.html
@@ -1,6 +1,6 @@
-<p id="TABLE.OF.CONTENTS" />
+<p id="TABLE.OF.CONTENTS"></p>
 <ul>
-<li id="TABLEOFCONTENTS" />
+<li id="TABLEOFCONTENTS"></li>
 </ul>
 <p id="TABLEOFCONTENTS">Or in the middle of the text </p>
-<p id="tableofcontents" />
-\ No newline at end of file
+<p id="tableofcontents"></p>
+\ No newline at end of file
diff --git a/tests/misc/blank-block-quote.html b/tests/misc/blank-block-quote.html
index 23df17a..966078c 100644
--- a/tests/misc/blank-block-quote.html
+++ b/tests/misc/blank-block-quote.html
@@ -1,3 +1,3 @@
 <p>aaaaaaaaaaa</p>
-<blockquote />
+<blockquote></blockquote>
 <p>bbbbbbbbbbb</p>
 \ No newline at end of file
diff --git a/tests/test_apis.py b/tests/test_apis.py
index eecfbf6..0de897a 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -277,7 +277,7 @@ class testETreeComments(unittest.TestCase):
     Test that ElementTree Comments work.
 
     These tests should only be a concern when using cElementTree with third
-    party serializers (including markdown's html4 serializer). While markdown
+    party serializers (including markdown's (x)html serializer). While markdown
     doesn't use ElementTree.Comment itself, we should certainly support any
     third party extensions which may. Therefore, these tests are included to
     ensure such support is maintained.
@@ -301,14 +301,14 @@ class testETreeComments(unittest.TestCase):
 
     def testCommentSerialization(self):
         """ Test that an ElementTree Comment serializes properly. """
-        self.assertEqual(markdown.html4.to_html_string(self.comment),
+        self.assertEqual(markdown.searializers.to_html_string(self.comment),
                     '<!--foo-->')
 
     def testCommentPrettify(self):
         """ Test that an ElementTree Comment is prettified properly. """
         pretty = markdown.treeprocessors.PrettifyTreeprocessor()
         pretty.run(self.comment)
-        self.assertEqual(markdown.html4.to_html_string(self.comment),
+        self.assertEqual(markdown.searializers.to_html_string(self.comment),
                     '<!--foo-->\n')
 
 
@@ -325,7 +325,7 @@ class testAtomicString(unittest.TestCase):
         p = markdown.util.etree.SubElement(tree, 'p')
         p.text = u'some *text*'
         new = self.inlineprocessor.run(tree)
-        self.assertEqual(markdown.html4.to_html_string(new), 
+        self.assertEqual(markdown.searializers.to_html_string(new), 
                     '<div><p>some <em>text</em></p></div>')
 
     def testSimpleAtomicString(self):
@@ -334,7 +334,7 @@ class testAtomicString(unittest.TestCase):
         p = markdown.util.etree.SubElement(tree, 'p')
         p.text = markdown.util.AtomicString(u'some *text*')
         new = self.inlineprocessor.run(tree)
-        self.assertEqual(markdown.html4.to_html_string(new), 
+        self.assertEqual(markdown.searializers.to_html_string(new), 
                     '<div><p>some *text*</p></div>')
 
     def testNestedAtomicString(self):
@@ -352,6 +352,6 @@ class testAtomicString(unittest.TestCase):
         span2.tail = markdown.util.AtomicString(u' *test*')
         span1.tail = markdown.util.AtomicString(u' *with*')
         new = self.inlineprocessor.run(tree)
-        self.assertEqual(markdown.html4.to_html_string(new), 
+        self.assertEqual(markdown.searializers.to_html_string(new), 
             '<div><p>*some* <span>*more* <span>*text* <span>*here*</span> *to*</span> *test*</span> *with*</p></div>')
author	Waylan Limberg <waylan@gmail.com>	2011-06-16 08:31:02 -0400
committer	Waylan Limberg <waylan@gmail.com>	2011-06-16 08:31:02 -0400
commit	1a42fa8602a99e0a8807f517ac44ecda77d86c22 (patch)
tree	115faa3db53a6f74551413491f0ef2cd1e220566
parent	619613e2d65cc339e3950483916ce29ed675b4fc (diff)
download	markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.tar.gz markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.tar.bz2 markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.zip