aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2017-11-23 07:56:38 -0700
committerWaylan Limberg <waylan.limberg@icloud.com>2017-11-23 09:56:38 -0500
commitde5c696f94e8dde242c29d4be50b7bbf3c17fedb (patch)
tree31c6c11b698be4b284c9b93b05a4d0ca3bd58e6a
parent007bd2aa4c184b28f710d041a0abe78bffc0ec2e (diff)
downloadmarkdown-de5c696f94e8dde242c29d4be50b7bbf3c17fedb.tar.gz
markdown-de5c696f94e8dde242c29d4be50b7bbf3c17fedb.tar.bz2
markdown-de5c696f94e8dde242c29d4be50b7bbf3c17fedb.zip
Feature ancestry (#598)
Ancestry exclusion for inline patterns. Adds the ability for an inline pattern to define a list of ancestor tag names that should be avoided. If a pattern would create a descendant of one of the listed tag names, the pattern will not match. Fixes #596.
-rw-r--r--.spell-dict3
-rw-r--r--docs/extensions/api.txt6
-rw-r--r--markdown/inlinepatterns.py2
-rw-r--r--markdown/treeprocessors.py57
-rw-r--r--tests/test_apis.py52
5 files changed, 107 insertions, 13 deletions
diff --git a/.spell-dict b/.spell-dict
index 44d0a16..fb9ea28 100644
--- a/.spell-dict
+++ b/.spell-dict
@@ -103,6 +103,7 @@ traceback
Tredinnick
Treeprocessor
Treeprocessors
+tuple
tuples
unordered
untrusted
@@ -122,4 +123,4 @@ wiki
JavaScript
plugin
plugins
-configs \ No newline at end of file
+configs
diff --git a/docs/extensions/api.txt b/docs/extensions/api.txt
index 9653883..246bb27 100644
--- a/docs/extensions/api.txt
+++ b/docs/extensions/api.txt
@@ -53,7 +53,7 @@ A pseudo example:
Inline Patterns {: #inlinepatterns }
------------------------------------
-Inline Patterns implement the inline HTML element syntax for Markdown such as
+Inline Patterns implement the inline HTML element syntax for Markdown such as
`*emphasis*` or `[links](http://example.com)`. Pattern objects should be
instances of classes that inherit from `markdown.inlinepatterns.Pattern` or
one of its children. Each pattern object uses a single regular expression and
@@ -68,6 +68,10 @@ must have the following methods:
Accepts a match object and returns an ElementTree element of a plain
Unicode string.
+Also, Inline Patterns can define the property `ANCESTOR_EXCLUDES` with either
+a list or tuple of undesirable ancestors. The pattern should not match if it
+would cause the content to be a descendant of one of the defined tag names.
+
Note that any regular expression returned by `getCompiledRegExp` must capture
the whole block. Therefore, they should all start with `r'^(.*?)'` and end
with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index 3658ebd..2f00b3d 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -189,6 +189,8 @@ The pattern classes
class Pattern(object):
"""Base class that inline patterns subclass. """
+ ANCESTOR_EXCLUDES = tuple()
+
def __init__(self, pattern, markdown_instance=None):
"""
Create an instant of an inline pattern.
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index bb76572..f159a8a 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -54,6 +54,7 @@ class InlineProcessor(Treeprocessor):
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
self.markdown = md
self.inlinePatterns = md.inlinePatterns
+ self.ancestors = []
def __makePlaceholder(self, type):
""" Generate a placeholder """
@@ -138,7 +139,7 @@ class InlineProcessor(Treeprocessor):
childResult.reverse()
for newChild in childResult:
- node.insert(pos, newChild)
+ node.insert(pos, newChild[0])
def __processPlaceholders(self, data, parent, isText=True):
"""
@@ -155,10 +156,10 @@ class InlineProcessor(Treeprocessor):
def linkText(text):
if text:
if result:
- if result[-1].tail:
- result[-1].tail += text
+ if result[-1][0].tail:
+ result[-1][0].tail += text
else:
- result[-1].tail = text
+ result[-1][0].tail = text
elif not isText:
if parent.tail:
parent.tail += text
@@ -199,7 +200,7 @@ class InlineProcessor(Treeprocessor):
continue
strartIndex = phEndIndex
- result.append(node)
+ result.append((node, self.ancestors[:]))
else: # wrong placeholder
end = index + len(self.__placeholder_prefix)
@@ -230,6 +231,11 @@ class InlineProcessor(Treeprocessor):
Returns: String with placeholders instead of ElementTree elements.
"""
+
+ for exclude in pattern.ANCESTOR_EXCLUDES:
+ if exclude.lower() in self.ancestors:
+ return data, False, 0
+
match = pattern.getCompiledRegExp().match(data[startIndex:])
leftData = data[:startIndex]
@@ -247,9 +253,11 @@ class InlineProcessor(Treeprocessor):
for child in [node] + list(node):
if not isString(node):
if child.text:
+ self.ancestors.append(child.tag.lower())
child.text = self.__handleInline(
child.text, patternIndex + 1
)
+ self.ancestors.pop()
if child.tail:
child.tail = self.__handleInline(
child.tail, patternIndex
@@ -261,7 +269,17 @@ class InlineProcessor(Treeprocessor):
match.group(1),
placeholder, match.groups()[-1]), True, 0
- def run(self, tree):
+ def __build_ancestors(self, parent, parents):
+ """Build the ancestor list."""
+ ancestors = []
+ while parent:
+ if parent:
+ ancestors.append(parent.tag.lower())
+ parent = self.parent_map.get(parent)
+ ancestors.reverse()
+ parents.extend(ancestors)
+
+ def run(self, tree, ancestors=None):
"""Apply inline patterns to a parsed Markdown tree.
Iterate over ElementTree, find elements with inline tag, apply inline
@@ -274,28 +292,42 @@ class InlineProcessor(Treeprocessor):
Arguments:
* tree: ElementTree object, representing Markdown tree.
+ * ancestors: List of parent tag names that preceed the tree node (if needed).
Returns: ElementTree object with applied inline patterns.
"""
self.stashed_nodes = {}
- stack = [tree]
+ # Ensure a valid parent list, but copy passed in lists
+ # to ensure we don't have the user accidentally change it on us.
+ tree_parents = [] if ancestors is None else ancestors[:]
+
+ self.parent_map = dict((c, p) for p in tree.getiterator() for c in p)
+ stack = [(tree, tree_parents)]
while stack:
- currElement = stack.pop()
+ currElement, parents = stack.pop()
+
+ self.ancestors = parents
+ self.__build_ancestors(currElement, self.ancestors)
+
insertQueue = []
for child in currElement:
if child.text and not isinstance(
child.text, util.AtomicString
):
+ self.ancestors.append(child.tag.lower())
text = child.text
child.text = None
lst = self.__processPlaceholders(
self.__handleInline(text), child
)
+ for l in lst:
+ self.parent_map[l[0]] = child
stack += lst
insertQueue.append((child, lst))
+ self.ancestors.pop()
if child.tail:
tail = self.__handleInline(child.tail)
dumby = util.etree.Element('d')
@@ -306,9 +338,11 @@ class InlineProcessor(Treeprocessor):
pos = list(currElement).index(child) + 1
tailResult.reverse()
for newChild in tailResult:
- currElement.insert(pos, newChild)
+ self.parent_map[newChild[0]] = currElement
+ currElement.insert(pos, newChild[0])
if len(child):
- stack.append(child)
+ self.parent_map[child] = currElement
+ stack.append((child, self.ancestors[:]))
for element, lst in insertQueue:
if self.markdown.enable_attributes:
@@ -317,7 +351,8 @@ class InlineProcessor(Treeprocessor):
element.text, element
)
i = 0
- for newChild in lst:
+ for obj in lst:
+ newChild = obj[0]
if self.markdown.enable_attributes:
# Processing attributes
if newChild.tail and isString(newChild.tail):
diff --git a/tests/test_apis.py b/tests/test_apis.py
index 7b1214f..48e79e8 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -770,3 +770,55 @@ class TestEscapeAppend(unittest.TestCase):
self.assertEqual('|' in md.ESCAPED_CHARS, True)
md2 = markdown.Markdown()
self.assertEqual('|' not in md2.ESCAPED_CHARS, True)
+
+
+class TestAncestorExclusion(unittest.TestCase):
+ """ Tests exclusion of tags in ancestor list. """
+
+ class AncestorExample(markdown.inlinepatterns.SimpleTagPattern):
+ """ Ancestor Test. """
+
+ ANCESTOR_EXCLUDES = ('a',)
+
+ def handleMatch(self, m):
+ """ Handle match. """
+ el = markdown.util.etree.Element(self.tag)
+ el.text = m.group(3)
+ return el
+
+ class AncestorExtension(markdown.Extension):
+
+ def __init__(self, *args, **kwargs):
+ """Initialize."""
+
+ self.config = {}
+
+ def extendMarkdown(self, md, md_globals):
+ """Modify inline patterns."""
+
+ pattern = r'(\+)([^\+]+)\2'
+ md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong')
+
+ def setUp(self):
+ """Setup markdown object."""
+ self.md = markdown.Markdown(extensions=[TestAncestorExclusion.AncestorExtension()])
+
+ def test_ancestors(self):
+ """ Test that an extension can exclude parent tags. """
+ test = """
+Some +test+ and a [+link+](http://test.com)
+"""
+ result = """<p>Some <strong>test</strong> and a <a href="http://test.com">+link+</a></p>"""
+
+ self.md.reset()
+ self.assertEqual(self.md.convert(test), result)
+
+ def test_ancestors_tail(self):
+ """ Test that an extension can exclude parent tags when dealing with a tail. """
+ test = """
+[***+em+*+strong+**](http://test.com)
+"""
+ result = """<p><a href="http://test.com"><strong><em>+em+</em>+strong+</strong></a></p>"""
+
+ self.md.reset()
+ self.assertEqual(self.md.convert(test), result)