aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2011-06-16 08:31:02 -0400
committerWaylan Limberg <waylan@gmail.com>2011-06-16 08:31:02 -0400
commit1a42fa8602a99e0a8807f517ac44ecda77d86c22 (patch)
tree115faa3db53a6f74551413491f0ef2cd1e220566
parent619613e2d65cc339e3950483916ce29ed675b4fc (diff)
downloadmarkdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.tar.gz
markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.tar.bz2
markdown-1a42fa8602a99e0a8807f517ac44ecda77d86c22.zip
Added our own xhtml searializer. We no longer use a xml searializer to output xhtml. This fixes #9 among other bugs. The test suite even had bad tests that should have been failing. They also have been corrected.
-rw-r--r--markdown/__init__.py10
-rw-r--r--markdown/searializers.py (renamed from markdown/html4.py)47
-rw-r--r--tests/extensions/extra/tables.html12
-rw-r--r--tests/misc/attributes2.html6
-rw-r--r--tests/misc/blank-block-quote.html2
-rw-r--r--tests/test_apis.py12
6 files changed, 52 insertions, 37 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 9b964b7..762da31 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -43,7 +43,7 @@ from treeprocessors import build_treeprocessors
from inlinepatterns import build_inlinepatterns
from postprocessors import build_postprocessors
from extensions import Extension
-import html4
+from searializers import to_html_string, to_xhtml_string
# For backwards compatibility in the 2.0.x series
# The things defined in these modules started off in __init__.py so third
@@ -67,10 +67,10 @@ class Markdown:
}
output_formats = {
- 'html' : html4.to_html_string,
- 'html4' : html4.to_html_string,
- 'xhtml' : util.etree.tostring,
- 'xhtml1': util.etree.tostring,
+ 'html' : to_html_string,
+ 'html4' : to_html_string,
+ 'xhtml' : to_xhtml_string,
+ 'xhtml1': to_xhtml_string,
}
def __init__(self, extensions=[], **kwargs):
diff --git a/markdown/html4.py b/markdown/searializers.py
index 611e628..39dcb56 100644
--- a/markdown/html4.py
+++ b/markdown/searializers.py
@@ -119,6 +119,8 @@ def _escape_attrib_html(text, encoding):
try:
if "&" in text:
text = text.replace("&", "&amp;")
+ if "<" in text:
+ text = text.replace("<", "&lt;")
if ">" in text:
text = text.replace(">", "&gt;")
if "\"" in text:
@@ -128,7 +130,7 @@ def _escape_attrib_html(text, encoding):
_raise_serialization_error(text)
-def _serialize_html(write, elem, encoding, qnames, namespaces):
+def _serialize_html(write, elem, encoding, qnames, namespaces, format):
tag = elem.tag
text = elem.text
if tag is Comment:
@@ -141,7 +143,7 @@ def _serialize_html(write, elem, encoding, qnames, namespaces):
if text:
write(_escape_cdata(text, encoding))
for e in elem:
- _serialize_html(write, e, encoding, qnames, None)
+ _serialize_html(write, e, encoding, qnames, None, format)
else:
write("<" + tag)
items = elem.items()
@@ -166,24 +168,28 @@ def _serialize_html(write, elem, encoding, qnames, namespaces):
k.encode(encoding),
_escape_attrib(v, encoding)
))
- write(">")
- tag = tag.lower()
- if text:
- if tag == "script" or tag == "style":
- write(_encode(text, encoding))
- else:
- write(_escape_cdata(text, encoding))
- for e in elem:
- _serialize_html(write, e, encoding, qnames, None)
- if tag not in HTML_EMPTY:
- write("</" + tag + ">")
+ if format == "xhtml" and tag in HTML_EMPTY:
+ write(" />")
+ else:
+ write(">")
+ tag = tag.lower()
+ if text:
+ if tag == "script" or tag == "style":
+ write(_encode(text, encoding))
+ else:
+ write(_escape_cdata(text, encoding))
+ for e in elem:
+ _serialize_html(write, e, encoding, qnames, None, format)
+ if tag not in HTML_EMPTY:
+ write("</" + tag + ">")
if elem.tail:
write(_escape_cdata(elem.tail, encoding))
def write_html(root, f,
# keyword arguments
encoding="us-ascii",
- default_namespace=None):
+ default_namespace=None,
+ format="html"):
assert root is not None
if not hasattr(f, "write"):
f = open(f, "wb")
@@ -194,7 +200,7 @@ def write_html(root, f,
root, encoding, default_namespace
)
_serialize_html(
- write, root, encoding, qnames, namespaces
+ write, root, encoding, qnames, namespaces, format
)
# --------------------------------------------------------------------
@@ -273,5 +279,14 @@ def to_html_string(element, encoding=None):
data = []
file = dummy()
file.write = data.append
- write_html(ElementTree(element).getroot(),file,encoding)
+ write_html(ElementTree(element).getroot(), file, encoding, format="html")
+ return "".join(data)
+
+def to_xhtml_string(element, encoding=None):
+ class dummy:
+ pass
+ data = []
+ file = dummy()
+ file.write = data.append
+ write_html(ElementTree(element).getroot(), file, encoding, format="xhtml")
return "".join(data)
diff --git a/tests/extensions/extra/tables.html b/tests/extensions/extra/tables.html
index c931e6a..1d626da 100644
--- a/tests/extensions/extra/tables.html
+++ b/tests/extensions/extra/tables.html
@@ -85,13 +85,13 @@
</thead>
<tbody>
<tr>
-<td align="left" />
+<td align="left"></td>
<td align="center">Q</td>
-<td align="right" />
+<td align="right"></td>
</tr>
<tr>
<td align="left">W</td>
-<td align="center" />
+<td align="center"></td>
<td align="right">W</td>
</tr>
</tbody>
@@ -106,13 +106,13 @@
</thead>
<tbody>
<tr>
-<td />
+<td></td>
<td>Q</td>
-<td />
+<td></td>
</tr>
<tr>
<td>W</td>
-<td />
+<td></td>
<td>W</td>
</tr>
</tbody>
diff --git a/tests/misc/attributes2.html b/tests/misc/attributes2.html
index 5971cc8..b78fee0 100644
--- a/tests/misc/attributes2.html
+++ b/tests/misc/attributes2.html
@@ -1,6 +1,6 @@
-<p id="TABLE.OF.CONTENTS" />
+<p id="TABLE.OF.CONTENTS"></p>
<ul>
-<li id="TABLEOFCONTENTS" />
+<li id="TABLEOFCONTENTS"></li>
</ul>
<p id="TABLEOFCONTENTS">Or in the middle of the text </p>
-<p id="tableofcontents" /> \ No newline at end of file
+<p id="tableofcontents"></p> \ No newline at end of file
diff --git a/tests/misc/blank-block-quote.html b/tests/misc/blank-block-quote.html
index 23df17a..966078c 100644
--- a/tests/misc/blank-block-quote.html
+++ b/tests/misc/blank-block-quote.html
@@ -1,3 +1,3 @@
<p>aaaaaaaaaaa</p>
-<blockquote />
+<blockquote></blockquote>
<p>bbbbbbbbbbb</p> \ No newline at end of file
diff --git a/tests/test_apis.py b/tests/test_apis.py
index eecfbf6..0de897a 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -277,7 +277,7 @@ class testETreeComments(unittest.TestCase):
Test that ElementTree Comments work.
These tests should only be a concern when using cElementTree with third
- party serializers (including markdown's html4 serializer). While markdown
+ party serializers (including markdown's (x)html serializer). While markdown
doesn't use ElementTree.Comment itself, we should certainly support any
third party extensions which may. Therefore, these tests are included to
ensure such support is maintained.
@@ -301,14 +301,14 @@ class testETreeComments(unittest.TestCase):
def testCommentSerialization(self):
""" Test that an ElementTree Comment serializes properly. """
- self.assertEqual(markdown.html4.to_html_string(self.comment),
+ self.assertEqual(markdown.searializers.to_html_string(self.comment),
'<!--foo-->')
def testCommentPrettify(self):
""" Test that an ElementTree Comment is prettified properly. """
pretty = markdown.treeprocessors.PrettifyTreeprocessor()
pretty.run(self.comment)
- self.assertEqual(markdown.html4.to_html_string(self.comment),
+ self.assertEqual(markdown.searializers.to_html_string(self.comment),
'<!--foo-->\n')
@@ -325,7 +325,7 @@ class testAtomicString(unittest.TestCase):
p = markdown.util.etree.SubElement(tree, 'p')
p.text = u'some *text*'
new = self.inlineprocessor.run(tree)
- self.assertEqual(markdown.html4.to_html_string(new),
+ self.assertEqual(markdown.searializers.to_html_string(new),
'<div><p>some <em>text</em></p></div>')
def testSimpleAtomicString(self):
@@ -334,7 +334,7 @@ class testAtomicString(unittest.TestCase):
p = markdown.util.etree.SubElement(tree, 'p')
p.text = markdown.util.AtomicString(u'some *text*')
new = self.inlineprocessor.run(tree)
- self.assertEqual(markdown.html4.to_html_string(new),
+ self.assertEqual(markdown.searializers.to_html_string(new),
'<div><p>some *text*</p></div>')
def testNestedAtomicString(self):
@@ -352,6 +352,6 @@ class testAtomicString(unittest.TestCase):
span2.tail = markdown.util.AtomicString(u' *test*')
span1.tail = markdown.util.AtomicString(u' *with*')
new = self.inlineprocessor.run(tree)
- self.assertEqual(markdown.html4.to_html_string(new),
+ self.assertEqual(markdown.searializers.to_html_string(new),
'<div><p>*some* <span>*more* <span>*text* <span>*here*</span> *to*</span> *test*</span> *with*</p></div>')