aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--markdown/__init__.py2
-rw-r--r--markdown/searializers.py84
2 files changed, 38 insertions, 48 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 7ca6cd1..562ee5f 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -285,7 +285,7 @@ class Markdown:
root = newRoot
# Serialize _properly_. Strip top-level tags.
- output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
+ output = self.serializer(root)
if self.stripTopLevelTags:
try:
start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2
diff --git a/markdown/searializers.py b/markdown/searializers.py
index 2de7b0f..22a83d4 100644
--- a/markdown/searializers.py
+++ b/markdown/searializers.py
@@ -1,6 +1,6 @@
-# markdown/html4.py
+# markdown/searializers.py
#
-# Add html4 serialization to older versions of Elementree
+# Add x/html serialization to Elementree
# Taken from ElementTree 1.3 preview with slight modifications
#
# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
@@ -82,7 +82,7 @@ def _encode(text, encoding):
except (TypeError, AttributeError):
_raise_serialization_error(text)
-def _escape_cdata(text, encoding):
+def _escape_cdata(text):
# escape character data
try:
# it's worth avoiding do-nothing calls for strings that are
@@ -94,12 +94,12 @@ def _escape_cdata(text, encoding):
text = text.replace("<", "&lt;")
if ">" in text:
text = text.replace(">", "&gt;")
- return text.encode(encoding, "xmlcharrefreplace")
+ return text
except (TypeError, AttributeError):
_raise_serialization_error(text)
-def _escape_attrib(text, encoding):
+def _escape_attrib(text):
# escape attribute value
try:
if "&" in text:
@@ -112,11 +112,11 @@ def _escape_attrib(text, encoding):
text = text.replace("\"", "&quot;")
if "\n" in text:
text = text.replace("\n", "&#10;")
- return text.encode(encoding, "xmlcharrefreplace")
+ return text
except (TypeError, AttributeError):
_raise_serialization_error(text)
-def _escape_attrib_html(text, encoding):
+def _escape_attrib_html(text):
# escape attribute value
try:
if "&" in text:
@@ -127,25 +127,25 @@ def _escape_attrib_html(text, encoding):
text = text.replace(">", "&gt;")
if "\"" in text:
text = text.replace("\"", "&quot;")
- return text.encode(encoding, "xmlcharrefreplace")
+ return text
except (TypeError, AttributeError):
_raise_serialization_error(text)
-def _serialize_html(write, elem, encoding, qnames, namespaces, format):
+def _serialize_html(write, elem, qnames, namespaces, format):
tag = elem.tag
text = elem.text
if tag is Comment:
- write("<!--%s-->" % _escape_cdata(text, encoding))
+ write("<!--%s-->" % _escape_cdata(text))
elif tag is ProcessingInstruction:
- write("<?%s?>" % _escape_cdata(text, encoding))
+ write("<?%s?>" % _escape_cdata(text))
else:
tag = qnames[tag]
if tag is None:
if text:
- write(_escape_cdata(text, encoding))
+ write(_escape_cdata(text))
for e in elem:
- _serialize_html(write, e, encoding, qnames, None, format)
+ _serialize_html(write, e, qnames, None, format)
else:
write("<" + tag)
items = elem.items()
@@ -157,7 +157,7 @@ def _serialize_html(write, elem, encoding, qnames, namespaces, format):
if isinstance(v, QName):
v = qnames[v.text]
else:
- v = _escape_attrib_html(v, encoding)
+ v = _escape_attrib_html(v)
if qnames[k] == v and format == 'html':
# handle boolean attributes
write(" %s" % v)
@@ -169,10 +169,7 @@ def _serialize_html(write, elem, encoding, qnames, namespaces, format):
for v, k in items:
if k:
k = ":" + k
- write(" xmlns%s=\"%s\"" % (
- k.encode(encoding),
- _escape_attrib(v, encoding)
- ))
+ write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
if format == "xhtml" and tag in HTML_EMPTY:
write(" />")
else:
@@ -180,38 +177,35 @@ def _serialize_html(write, elem, encoding, qnames, namespaces, format):
tag = tag.lower()
if text:
if tag == "script" or tag == "style":
- write(_encode(text, encoding))
+ write(text)
else:
- write(_escape_cdata(text, encoding))
+ write(_escape_cdata(text))
for e in elem:
- _serialize_html(write, e, encoding, qnames, None, format)
+ _serialize_html(write, e, qnames, None, format)
if tag not in HTML_EMPTY:
write("</" + tag + ">")
if elem.tail:
- write(_escape_cdata(elem.tail, encoding))
+ write(_escape_cdata(elem.tail))
def _write_html(root,
- # keyword arguments
- encoding="utf-8",
- default_namespace=None,
- format="html"):
+ encoding=None,
+ default_namespace=None,
+ format="html"):
assert root is not None
data = []
write = data.append
- if not encoding:
- encoding = "utf-8"
- qnames, namespaces = _namespaces(
- root, encoding, default_namespace
- )
- _serialize_html(
- write, root, encoding, qnames, namespaces, format
- )
- return "".join(data)
+ qnames, namespaces = _namespaces(root, default_namespace)
+ _serialize_html(write, root, qnames, namespaces, format)
+ if encoding is None:
+ return "".join(data)
+ else:
+ return _encode("".join(data))
+
# --------------------------------------------------------------------
# serialization support
-def _namespaces(elem, encoding, default_namespace=None):
+def _namespaces(elem, default_namespace=None):
# identify namespaces used in this tree
# maps qnames to *encoded* prefix:local names
@@ -222,9 +216,6 @@ def _namespaces(elem, encoding, default_namespace=None):
if default_namespace:
namespaces[default_namespace] = ""
- def encode(text):
- return text.encode(encoding)
-
def add_qname(qname):
# calculate serialized qname representation
try:
@@ -238,17 +229,16 @@ def _namespaces(elem, encoding, default_namespace=None):
if prefix != "xml":
namespaces[uri] = prefix
if prefix:
- qnames[qname] = encode("%s:%s" % (prefix, tag))
+ qnames[qname] = "%s:%s" % (prefix, tag)
else:
- qnames[qname] = encode(tag) # default element
+ qnames[qname] = tag # default element
else:
if default_namespace:
- # FIXME: can this be handled in XML 1.0?
raise ValueError(
"cannot use non-qualified names with "
"default_namespace option"
)
- qnames[qname] = encode(qname)
+ qnames[qname] = qname
except TypeError:
_raise_serialization_error(qname)
@@ -278,8 +268,8 @@ def _namespaces(elem, encoding, default_namespace=None):
add_qname(text.text)
return qnames, namespaces
-def to_html_string(element, encoding=None):
- return _write_html(ElementTree(element).getroot(), encoding, format="html")
+def to_html_string(element):
+ return _write_html(ElementTree(element).getroot(), format="html")
-def to_xhtml_string(element, encoding=None):
- return _write_html(ElementTree(element).getroot(), encoding, format="xhtml")
+def to_xhtml_string(element):
+ return _write_html(ElementTree(element).getroot(), format="xhtml")