From 9082ed45deef99a0ff3c20aaa9f2c9b63e576838 Mon Sep 17 00:00:00 2001 From: facelessuser Date: Fri, 19 Sep 2014 17:44:05 -0600 Subject: Fix the lost tail issue in inlineprocessors. See #253. Prior to this patch, if any inline processors returned an element with a tail, the tail would end up empty. This resolves that issue and will allow for #253 to be fixed. Thanks to @facelessuser for the work on this. --- markdown/inlinepatterns.py | 16 ++++++++-------- markdown/treeprocessors.py | 33 ++++++++++++++++++--------------- tests/test_apis.py | 14 ++++++++++++++ 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index eaf4040..e990418 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -156,7 +156,7 @@ class Pattern(object): """ self.pattern = pattern - self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, + self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, re.DOTALL | re.UNICODE) # Api for Markdown to pass safe_mode into instance @@ -210,7 +210,7 @@ class Pattern(object): return value else: # An etree Element - return text content only - return ''.join(itertext(value)) + return ''.join(itertext(value)) return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) @@ -228,7 +228,7 @@ class EscapePattern(Pattern): if char in self.markdown.ESCAPED_CHARS: return '%s%s%s' % (util.STX, ord(char), util.ETX) else: - return None + return None class SimpleTagPattern(Pattern): @@ -300,7 +300,7 @@ class HtmlPattern(Pattern): return self.markdown.serializer(value) except: return '\%s' % value - + return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) @@ -320,7 +320,7 @@ class LinkPattern(Pattern): el.set("href", "") if title: - title = dequote(self.unescape(title)) + title = dequote(self.unescape(title)) el.set("title", title) return el @@ -344,19 +344,19 @@ class LinkPattern(Pattern): if not self.markdown.safeMode: # Return immediately bipassing parsing. return url - + try: scheme, netloc, path, params, query, fragment = url = urlparse(url) except ValueError: #pragma: no cover # Bad url - so bad it couldn't be parsed. return '' - + locless_schemes = ['', 'mailto', 'news'] allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps'] if scheme not in allowed_schemes: # Not a known (allowed) scheme. Not safe. return '' - + if netloc == '' and scheme not in locless_schemes: #pragma: no cover # This should not happen. Treat as suspect. return '' diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index a82141a..d3ee207 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -34,8 +34,8 @@ class Treeprocessor(util.Processor): def run(self, root): """ Subclasses of Treeprocessor should implement a `run` method, which - takes a root ElementTree. This method can return another ElementTree - object, and the existing root ElementTree will be replaced, or it can + takes a root ElementTree. This method can return another ElementTree + object, and the existing root ElementTree will be replaced, or it can modify the current tree and return None. """ pass #pragma: no cover @@ -71,7 +71,7 @@ class InlineProcessor(Treeprocessor): * index: index, from which we start search Returns: placeholder id and string index, after the found placeholder. - + """ m = self.__placeholder_re.search(data, index) if m: @@ -129,11 +129,10 @@ class InlineProcessor(Treeprocessor): text = subnode.tail subnode.tail = None - childResult = self.__processPlaceholders(text, subnode) + childResult = self.__processPlaceholders(text, subnode, isText) if not isText and node is not subnode: pos = list(node).index(subnode) - node.remove(subnode) else: pos = 0 @@ -141,7 +140,7 @@ class InlineProcessor(Treeprocessor): for newChild in childResult: node.insert(pos, newChild) - def __processPlaceholders(self, data, parent): + def __processPlaceholders(self, data, parent, isText=True): """ Process string with placeholders and generate ElementTree tree. @@ -151,7 +150,7 @@ class InlineProcessor(Treeprocessor): * parent: Element, which contains processing inline data Returns: list with ElementTree elements with applied inline patterns. - + """ def linkText(text): if text: @@ -160,6 +159,11 @@ class InlineProcessor(Treeprocessor): result[-1].tail += text else: result[-1].tail = text + elif not isText: + if parent.tail: + parent.tail += text + else: + parent.tail = text else: if parent.text: parent.text += text @@ -183,7 +187,7 @@ class InlineProcessor(Treeprocessor): for child in [node] + list(node): if child.tail: if child.tail.strip(): - self.__processElementText(node, child,False) + self.__processElementText(node, child, False) if child.text: if child.text.strip(): self.__processElementText(child, child) @@ -240,7 +244,7 @@ class InlineProcessor(Treeprocessor): # We need to process current node too for child in [node] + list(node): if not isString(node): - if child.text: + if child.text: child.text = self.__handleInline(child.text, patternIndex + 1) if child.tail: @@ -288,11 +292,10 @@ class InlineProcessor(Treeprocessor): if child.tail: tail = self.__handleInline(child.tail) dumby = util.etree.Element('d') - tailResult = self.__processPlaceholders(tail, dumby) - if dumby.text: - child.tail = dumby.text - else: - child.tail = None + child.tail = None + tailResult = self.__processPlaceholders(tail, dumby, False) + if dumby.tail: + child.tail = dumby.tail pos = list(currElement).index(child) + 1 tailResult.reverse() for newChild in tailResult: @@ -304,7 +307,7 @@ class InlineProcessor(Treeprocessor): if self.markdown.enable_attributes: if element.text and isString(element.text): element.text = \ - inlinepatterns.handleAttributes(element.text, + inlinepatterns.handleAttributes(element.text, element) i = 0 for newChild in lst: diff --git a/tests/test_apis.py b/tests/test_apis.py index 8cb2c66..f89dba9 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -380,6 +380,20 @@ class testETreeComments(unittest.TestCase): '\n') +class testElementTailTests(unittest.TestCase): + """ Element Tail Tests """ + def setUp(self): + self.pretty = markdown.treeprocessors.PrettifyTreeprocessor() + + def testBrTailNoNewline(self): + """ Test that last
in tree has a new line tail """ + root = markdown.util.etree.Element('root') + br = markdown.util.etree.SubElement(root, 'br') + self.assertEqual(br.tail, None) + self.pretty.run(root) + self.assertEqual(br.tail, "\n") + + class testSerializers(unittest.TestCase): """ Test the html and xhtml serializers. """ -- cgit v1.2.3