diff options
-rw-r--r-- | markdown/__init__.py | 2 | ||||
-rw-r--r-- | markdown/searializers.py | 84 |
2 files changed, 38 insertions, 48 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py index 7ca6cd1..562ee5f 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -285,7 +285,7 @@ class Markdown: root = newRoot # Serialize _properly_. Strip top-level tags. - output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8")) + output = self.serializer(root) if self.stripTopLevelTags: try: start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2 diff --git a/markdown/searializers.py b/markdown/searializers.py index 2de7b0f..22a83d4 100644 --- a/markdown/searializers.py +++ b/markdown/searializers.py @@ -1,6 +1,6 @@ -# markdown/html4.py +# markdown/searializers.py # -# Add html4 serialization to older versions of Elementree +# Add x/html serialization to Elementree # Taken from ElementTree 1.3 preview with slight modifications # # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. @@ -82,7 +82,7 @@ def _encode(text, encoding): except (TypeError, AttributeError): _raise_serialization_error(text) -def _escape_cdata(text, encoding): +def _escape_cdata(text): # escape character data try: # it's worth avoiding do-nothing calls for strings that are @@ -94,12 +94,12 @@ def _escape_cdata(text, encoding): text = text.replace("<", "<") if ">" in text: text = text.replace(">", ">") - return text.encode(encoding, "xmlcharrefreplace") + return text except (TypeError, AttributeError): _raise_serialization_error(text) -def _escape_attrib(text, encoding): +def _escape_attrib(text): # escape attribute value try: if "&" in text: @@ -112,11 +112,11 @@ def _escape_attrib(text, encoding): text = text.replace("\"", """) if "\n" in text: text = text.replace("\n", " ") - return text.encode(encoding, "xmlcharrefreplace") + return text except (TypeError, AttributeError): _raise_serialization_error(text) -def _escape_attrib_html(text, encoding): +def _escape_attrib_html(text): # escape attribute value try: if "&" in text: @@ -127,25 +127,25 @@ def _escape_attrib_html(text, encoding): text = text.replace(">", ">") if "\"" in text: text = text.replace("\"", """) - return text.encode(encoding, "xmlcharrefreplace") + return text except (TypeError, AttributeError): _raise_serialization_error(text) -def _serialize_html(write, elem, encoding, qnames, namespaces, format): +def _serialize_html(write, elem, qnames, namespaces, format): tag = elem.tag text = elem.text if tag is Comment: - write("<!--%s-->" % _escape_cdata(text, encoding)) + write("<!--%s-->" % _escape_cdata(text)) elif tag is ProcessingInstruction: - write("<?%s?>" % _escape_cdata(text, encoding)) + write("<?%s?>" % _escape_cdata(text)) else: tag = qnames[tag] if tag is None: if text: - write(_escape_cdata(text, encoding)) + write(_escape_cdata(text)) for e in elem: - _serialize_html(write, e, encoding, qnames, None, format) + _serialize_html(write, e, qnames, None, format) else: write("<" + tag) items = elem.items() @@ -157,7 +157,7 @@ def _serialize_html(write, elem, encoding, qnames, namespaces, format): if isinstance(v, QName): v = qnames[v.text] else: - v = _escape_attrib_html(v, encoding) + v = _escape_attrib_html(v) if qnames[k] == v and format == 'html': # handle boolean attributes write(" %s" % v) @@ -169,10 +169,7 @@ def _serialize_html(write, elem, encoding, qnames, namespaces, format): for v, k in items: if k: k = ":" + k - write(" xmlns%s=\"%s\"" % ( - k.encode(encoding), - _escape_attrib(v, encoding) - )) + write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) if format == "xhtml" and tag in HTML_EMPTY: write(" />") else: @@ -180,38 +177,35 @@ def _serialize_html(write, elem, encoding, qnames, namespaces, format): tag = tag.lower() if text: if tag == "script" or tag == "style": - write(_encode(text, encoding)) + write(text) else: - write(_escape_cdata(text, encoding)) + write(_escape_cdata(text)) for e in elem: - _serialize_html(write, e, encoding, qnames, None, format) + _serialize_html(write, e, qnames, None, format) if tag not in HTML_EMPTY: write("</" + tag + ">") if elem.tail: - write(_escape_cdata(elem.tail, encoding)) + write(_escape_cdata(elem.tail)) def _write_html(root, - # keyword arguments - encoding="utf-8", - default_namespace=None, - format="html"): + encoding=None, + default_namespace=None, + format="html"): assert root is not None data = [] write = data.append - if not encoding: - encoding = "utf-8" - qnames, namespaces = _namespaces( - root, encoding, default_namespace - ) - _serialize_html( - write, root, encoding, qnames, namespaces, format - ) - return "".join(data) + qnames, namespaces = _namespaces(root, default_namespace) + _serialize_html(write, root, qnames, namespaces, format) + if encoding is None: + return "".join(data) + else: + return _encode("".join(data)) + # -------------------------------------------------------------------- # serialization support -def _namespaces(elem, encoding, default_namespace=None): +def _namespaces(elem, default_namespace=None): # identify namespaces used in this tree # maps qnames to *encoded* prefix:local names @@ -222,9 +216,6 @@ def _namespaces(elem, encoding, default_namespace=None): if default_namespace: namespaces[default_namespace] = "" - def encode(text): - return text.encode(encoding) - def add_qname(qname): # calculate serialized qname representation try: @@ -238,17 +229,16 @@ def _namespaces(elem, encoding, default_namespace=None): if prefix != "xml": namespaces[uri] = prefix if prefix: - qnames[qname] = encode("%s:%s" % (prefix, tag)) + qnames[qname] = "%s:%s" % (prefix, tag) else: - qnames[qname] = encode(tag) # default element + qnames[qname] = tag # default element else: if default_namespace: - # FIXME: can this be handled in XML 1.0? raise ValueError( "cannot use non-qualified names with " "default_namespace option" ) - qnames[qname] = encode(qname) + qnames[qname] = qname except TypeError: _raise_serialization_error(qname) @@ -278,8 +268,8 @@ def _namespaces(elem, encoding, default_namespace=None): add_qname(text.text) return qnames, namespaces -def to_html_string(element, encoding=None): - return _write_html(ElementTree(element).getroot(), encoding, format="html") +def to_html_string(element): + return _write_html(ElementTree(element).getroot(), format="html") -def to_xhtml_string(element, encoding=None): - return _write_html(ElementTree(element).getroot(), encoding, format="xhtml") +def to_xhtml_string(element): + return _write_html(ElementTree(element).getroot(), format="xhtml") |