From dfec5a1a062718caedc24a99c9f31e9fa4a36d15 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 20 Jan 2012 04:39:25 -0500 Subject: It is spelled 'serializers' not 'searializers'. --- markdown/__init__.py | 2 +- markdown/searializers.py | 275 ----------------------------------------------- markdown/serializers.py | 275 +++++++++++++++++++++++++++++++++++++++++++++++ tests/test_apis.py | 14 +-- 4 files changed, 283 insertions(+), 283 deletions(-) delete mode 100644 markdown/searializers.py create mode 100644 markdown/serializers.py diff --git a/markdown/__init__.py b/markdown/__init__.py index a08af5b..b06f8e1 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -44,7 +44,7 @@ from treeprocessors import build_treeprocessors from inlinepatterns import build_inlinepatterns from postprocessors import build_postprocessors from extensions import Extension -from searializers import to_html_string, to_xhtml_string +from serializers import to_html_string, to_xhtml_string __all__ = ['Markdown', 'markdown', 'markdownFromFile'] diff --git a/markdown/searializers.py b/markdown/searializers.py deleted file mode 100644 index 22a83d4..0000000 --- a/markdown/searializers.py +++ /dev/null @@ -1,275 +0,0 @@ -# markdown/searializers.py -# -# Add x/html serialization to Elementree -# Taken from ElementTree 1.3 preview with slight modifications -# -# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. -# -# fredrik@pythonware.com -# http://www.pythonware.com -# -# -------------------------------------------------------------------- -# The ElementTree toolkit is -# -# Copyright (c) 1999-2007 by Fredrik Lundh -# -# By obtaining, using, and/or copying this software and/or its -# associated documentation, you agree that you have read, understood, -# and will comply with the following terms and conditions: -# -# Permission to use, copy, modify, and distribute this software and -# its associated documentation for any purpose and without fee is -# hereby granted, provided that the above copyright notice appears in -# all copies, and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Secret Labs AB or the author not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD -# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- -# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR -# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE -# OF THIS SOFTWARE. -# -------------------------------------------------------------------- - - -import util -ElementTree = util.etree.ElementTree -QName = util.etree.QName -if hasattr(util.etree, 'test_comment'): - Comment = util.etree.test_comment -else: - Comment = util.etree.Comment -PI = util.etree.PI -ProcessingInstruction = util.etree.ProcessingInstruction - -__all__ = ['to_html_string', 'to_xhtml_string'] - -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta" "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass - -_namespace_map = { - # "well-known" namespace prefixes - "http://www.w3.org/XML/1998/namespace": "xml", - "http://www.w3.org/1999/xhtml": "html", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", - "http://schemas.xmlsoap.org/wsdl/": "wsdl", - # xml schema - "http://www.w3.org/2001/XMLSchema": "xs", - "http://www.w3.org/2001/XMLSchema-instance": "xsi", - # dublic core - "http://purl.org/dc/elements/1.1/": "dc", -} - - -def _raise_serialization_error(text): - raise TypeError( - "cannot serialize %r (type %s)" % (text, type(text).__name__) - ) - -def _encode(text, encoding): - try: - return text.encode(encoding, "xmlcharrefreplace") - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_cdata(text): - # escape character data - try: - # it's worth avoiding do-nothing calls for strings that are - # shorter than 500 character, or so. assume that's, by far, - # the most common case in most applications. - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - - -def _escape_attrib(text): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - if "\n" in text: - text = text.replace("\n", " ") - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - -def _escape_attrib_html(text): - # escape attribute value - try: - if "&" in text: - text = text.replace("&", "&") - if "<" in text: - text = text.replace("<", "<") - if ">" in text: - text = text.replace(">", ">") - if "\"" in text: - text = text.replace("\"", """) - return text - except (TypeError, AttributeError): - _raise_serialization_error(text) - - -def _serialize_html(write, elem, qnames, namespaces, format): - tag = elem.tag - text = elem.text - if tag is Comment: - write("" % _escape_cdata(text)) - elif tag is ProcessingInstruction: - write("" % _escape_cdata(text)) - else: - tag = qnames[tag] - if tag is None: - if text: - write(_escape_cdata(text)) - for e in elem: - _serialize_html(write, e, qnames, None, format) - else: - write("<" + tag) - items = elem.items() - if items or namespaces: - items.sort() # lexical order - for k, v in items: - if isinstance(k, QName): - k = k.text - if isinstance(v, QName): - v = qnames[v.text] - else: - v = _escape_attrib_html(v) - if qnames[k] == v and format == 'html': - # handle boolean attributes - write(" %s" % v) - else: - write(" %s=\"%s\"" % (qnames[k], v)) - if namespaces: - items = namespaces.items() - items.sort(key=lambda x: x[1]) # sort on prefix - for v, k in items: - if k: - k = ":" + k - write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) - if format == "xhtml" and tag in HTML_EMPTY: - write(" />") - else: - write(">") - tag = tag.lower() - if text: - if tag == "script" or tag == "style": - write(text) - else: - write(_escape_cdata(text)) - for e in elem: - _serialize_html(write, e, qnames, None, format) - if tag not in HTML_EMPTY: - write("") - if elem.tail: - write(_escape_cdata(elem.tail)) - -def _write_html(root, - encoding=None, - default_namespace=None, - format="html"): - assert root is not None - data = [] - write = data.append - qnames, namespaces = _namespaces(root, default_namespace) - _serialize_html(write, root, qnames, namespaces, format) - if encoding is None: - return "".join(data) - else: - return _encode("".join(data)) - - -# -------------------------------------------------------------------- -# serialization support - -def _namespaces(elem, default_namespace=None): - # identify namespaces used in this tree - - # maps qnames to *encoded* prefix:local names - qnames = {None: None} - - # maps uri:s to prefixes - namespaces = {} - if default_namespace: - namespaces[default_namespace] = "" - - def add_qname(qname): - # calculate serialized qname representation - try: - if qname[:1] == "{": - uri, tag = qname[1:].split("}", 1) - prefix = namespaces.get(uri) - if prefix is None: - prefix = _namespace_map.get(uri) - if prefix is None: - prefix = "ns%d" % len(namespaces) - if prefix != "xml": - namespaces[uri] = prefix - if prefix: - qnames[qname] = "%s:%s" % (prefix, tag) - else: - qnames[qname] = tag # default element - else: - if default_namespace: - raise ValueError( - "cannot use non-qualified names with " - "default_namespace option" - ) - qnames[qname] = qname - except TypeError: - _raise_serialization_error(qname) - - # populate qname and namespaces table - try: - iterate = elem.iter - except AttributeError: - iterate = elem.getiterator # cET compatibility - for elem in iterate(): - tag = elem.tag - if isinstance(tag, QName) and tag.text not in qnames: - add_qname(tag.text) - elif isinstance(tag, basestring): - if tag not in qnames: - add_qname(tag) - elif tag is not None and tag is not Comment and tag is not PI: - _raise_serialization_error(tag) - for key, value in elem.items(): - if isinstance(key, QName): - key = key.text - if key not in qnames: - add_qname(key) - if isinstance(value, QName) and value.text not in qnames: - add_qname(value.text) - text = elem.text - if isinstance(text, QName) and text.text not in qnames: - add_qname(text.text) - return qnames, namespaces - -def to_html_string(element): - return _write_html(ElementTree(element).getroot(), format="html") - -def to_xhtml_string(element): - return _write_html(ElementTree(element).getroot(), format="xhtml") diff --git a/markdown/serializers.py b/markdown/serializers.py new file mode 100644 index 0000000..22a83d4 --- /dev/null +++ b/markdown/serializers.py @@ -0,0 +1,275 @@ +# markdown/searializers.py +# +# Add x/html serialization to Elementree +# Taken from ElementTree 1.3 preview with slight modifications +# +# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2007 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + + +import util +ElementTree = util.etree.ElementTree +QName = util.etree.QName +if hasattr(util.etree, 'test_comment'): + Comment = util.etree.test_comment +else: + Comment = util.etree.Comment +PI = util.etree.PI +ProcessingInstruction = util.etree.ProcessingInstruction + +__all__ = ['to_html_string', 'to_xhtml_string'] + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta" "param") + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: + pass + +_namespace_map = { + # "well-known" namespace prefixes + "http://www.w3.org/XML/1998/namespace": "xml", + "http://www.w3.org/1999/xhtml": "html", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", + "http://schemas.xmlsoap.org/wsdl/": "wsdl", + # xml schema + "http://www.w3.org/2001/XMLSchema": "xs", + "http://www.w3.org/2001/XMLSchema-instance": "xsi", + # dublic core + "http://purl.org/dc/elements/1.1/": "dc", +} + + +def _raise_serialization_error(text): + raise TypeError( + "cannot serialize %r (type %s)" % (text, type(text).__name__) + ) + +def _encode(text, encoding): + try: + return text.encode(encoding, "xmlcharrefreplace") + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_cdata(text): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + + +def _escape_attrib(text): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + +def _escape_attrib_html(text): + # escape attribute value + try: + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + + +def _serialize_html(write, elem, qnames, namespaces, format): + tag = elem.tag + text = elem.text + if tag is Comment: + write("" % _escape_cdata(text)) + elif tag is ProcessingInstruction: + write("" % _escape_cdata(text)) + else: + tag = qnames[tag] + if tag is None: + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, qnames, None, format) + else: + write("<" + tag) + items = elem.items() + if items or namespaces: + items.sort() # lexical order + for k, v in items: + if isinstance(k, QName): + k = k.text + if isinstance(v, QName): + v = qnames[v.text] + else: + v = _escape_attrib_html(v) + if qnames[k] == v and format == 'html': + # handle boolean attributes + write(" %s" % v) + else: + write(" %s=\"%s\"" % (qnames[k], v)) + if namespaces: + items = namespaces.items() + items.sort(key=lambda x: x[1]) # sort on prefix + for v, k in items: + if k: + k = ":" + k + write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v))) + if format == "xhtml" and tag in HTML_EMPTY: + write(" />") + else: + write(">") + tag = tag.lower() + if text: + if tag == "script" or tag == "style": + write(text) + else: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, qnames, None, format) + if tag not in HTML_EMPTY: + write("") + if elem.tail: + write(_escape_cdata(elem.tail)) + +def _write_html(root, + encoding=None, + default_namespace=None, + format="html"): + assert root is not None + data = [] + write = data.append + qnames, namespaces = _namespaces(root, default_namespace) + _serialize_html(write, root, qnames, namespaces, format) + if encoding is None: + return "".join(data) + else: + return _encode("".join(data)) + + +# -------------------------------------------------------------------- +# serialization support + +def _namespaces(elem, default_namespace=None): + # identify namespaces used in this tree + + # maps qnames to *encoded* prefix:local names + qnames = {None: None} + + # maps uri:s to prefixes + namespaces = {} + if default_namespace: + namespaces[default_namespace] = "" + + def add_qname(qname): + # calculate serialized qname representation + try: + if qname[:1] == "{": + uri, tag = qname[1:].split("}", 1) + prefix = namespaces.get(uri) + if prefix is None: + prefix = _namespace_map.get(uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + if prefix != "xml": + namespaces[uri] = prefix + if prefix: + qnames[qname] = "%s:%s" % (prefix, tag) + else: + qnames[qname] = tag # default element + else: + if default_namespace: + raise ValueError( + "cannot use non-qualified names with " + "default_namespace option" + ) + qnames[qname] = qname + except TypeError: + _raise_serialization_error(qname) + + # populate qname and namespaces table + try: + iterate = elem.iter + except AttributeError: + iterate = elem.getiterator # cET compatibility + for elem in iterate(): + tag = elem.tag + if isinstance(tag, QName) and tag.text not in qnames: + add_qname(tag.text) + elif isinstance(tag, basestring): + if tag not in qnames: + add_qname(tag) + elif tag is not None and tag is not Comment and tag is not PI: + _raise_serialization_error(tag) + for key, value in elem.items(): + if isinstance(key, QName): + key = key.text + if key not in qnames: + add_qname(key) + if isinstance(value, QName) and value.text not in qnames: + add_qname(value.text) + text = elem.text + if isinstance(text, QName) and text.text not in qnames: + add_qname(text.text) + return qnames, namespaces + +def to_html_string(element): + return _write_html(ElementTree(element).getroot(), format="html") + +def to_xhtml_string(element): + return _write_html(ElementTree(element).getroot(), format="xhtml") diff --git a/tests/test_apis.py b/tests/test_apis.py index 0943b92..218c009 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -46,7 +46,7 @@ class TestBlockParser(unittest.TestCase): root = markdown.util.etree.Element("div") text = 'foo' self.parser.parseChunk(root, text) - self.assertEqual(markdown.searializers.to_xhtml_string(root), + self.assertEqual(markdown.serializers.to_xhtml_string(root), "

foo

") def testParseDocument(self): @@ -55,7 +55,7 @@ class TestBlockParser(unittest.TestCase): tree = self.parser.parseDocument(lines) self.assertTrue(isinstance(tree, markdown.util.etree.ElementTree)) self.assertTrue(markdown.util.etree.iselement(tree.getroot())) - self.assertEqual(markdown.searializers.to_xhtml_string(tree.getroot()), + self.assertEqual(markdown.serializers.to_xhtml_string(tree.getroot()), "

foo

bar

baz\n
") @@ -303,14 +303,14 @@ class testETreeComments(unittest.TestCase): def testCommentSerialization(self): """ Test that an ElementTree Comment serializes properly. """ - self.assertEqual(markdown.searializers.to_html_string(self.comment), + self.assertEqual(markdown.serializers.to_html_string(self.comment), '') def testCommentPrettify(self): """ Test that an ElementTree Comment is prettified properly. """ pretty = markdown.treeprocessors.PrettifyTreeprocessor() pretty.run(self.comment) - self.assertEqual(markdown.searializers.to_html_string(self.comment), + self.assertEqual(markdown.serializers.to_html_string(self.comment), '\n') @@ -327,7 +327,7 @@ class testAtomicString(unittest.TestCase): p = markdown.util.etree.SubElement(tree, 'p') p.text = u'some *text*' new = self.inlineprocessor.run(tree) - self.assertEqual(markdown.searializers.to_html_string(new), + self.assertEqual(markdown.serializers.to_html_string(new), '

some text

') def testSimpleAtomicString(self): @@ -336,7 +336,7 @@ class testAtomicString(unittest.TestCase): p = markdown.util.etree.SubElement(tree, 'p') p.text = markdown.util.AtomicString(u'some *text*') new = self.inlineprocessor.run(tree) - self.assertEqual(markdown.searializers.to_html_string(new), + self.assertEqual(markdown.serializers.to_html_string(new), '

some *text*

') def testNestedAtomicString(self): @@ -354,7 +354,7 @@ class testAtomicString(unittest.TestCase): span2.tail = markdown.util.AtomicString(u' *test*') span1.tail = markdown.util.AtomicString(u' *with*') new = self.inlineprocessor.run(tree) - self.assertEqual(markdown.searializers.to_html_string(new), + self.assertEqual(markdown.serializers.to_html_string(new), '

*some* *more* *text* *here* ' '*to* *test* *with*

') -- cgit v1.2.3