aboutsummaryrefslogtreecommitdiffstats
path: root/markdown/inlinepatterns.py
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2018-01-17 18:36:34 -0700
committerWaylan Limberg <waylan.limberg@icloud.com>2018-01-17 20:36:34 -0500
commitd18c3d0acab0e7469c3284c897afcb61f9dd1fea (patch)
treece123f6ea56e105e1635126c61821d01acc82fb4 /markdown/inlinepatterns.py
parentde9cc42f2d8bc29d8f16058a6ae65a434caab7d6 (diff)
downloadmarkdown-d18c3d0acab0e7469c3284c897afcb61f9dd1fea.tar.gz
markdown-d18c3d0acab0e7469c3284c897afcb61f9dd1fea.tar.bz2
markdown-d18c3d0acab0e7469c3284c897afcb61f9dd1fea.zip
Flexible inline (#629)
Add new InlineProcessor class that handles inline processing much better and allows for more flexibility. This adds new InlineProcessors that no longer utilize unnecessary pretext and posttext captures. New class can accept the buffer that is being worked on and manually process the text without regex and return new replacement bounds. This helps us to handle links in a better way and handle nested brackets and logic that is too much for regular expression. The refactor also allows image links to have links/paths with spaces like links. Ref #551, #613, #590, #161.
Diffstat (limited to 'markdown/inlinepatterns.py')
-rw-r--r--markdown/inlinepatterns.py483
1 files changed, 367 insertions, 116 deletions
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index dbb4d06..18da73b 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -55,31 +55,31 @@ except ImportError: # pragma: no cover
def build_inlinepatterns(md_instance, **kwargs):
""" Build the default set of inline patterns for Markdown. """
inlinePatterns = odict.OrderedDict()
- inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE)
- inlinePatterns["escape"] = EscapePattern(ESCAPE_RE, md_instance)
- inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance)
- inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance)
- inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance)
- inlinePatterns["image_reference"] = ImageReferencePattern(
+ inlinePatterns["backtick"] = BacktickInlineProcessor(BACKTICK_RE)
+ inlinePatterns["escape"] = EscapeInlineProcessor(ESCAPE_RE, md_instance)
+ inlinePatterns["reference"] = ReferenceInlineProcessor(REFERENCE_RE, md_instance)
+ inlinePatterns["link"] = LinkInlineProcessor(LINK_RE, md_instance)
+ inlinePatterns["image_link"] = ImageInlineProcessor(IMAGE_LINK_RE, md_instance)
+ inlinePatterns["image_reference"] = ImageReferenceInlineProcessor(
IMAGE_REFERENCE_RE, md_instance
)
- inlinePatterns["short_reference"] = ReferencePattern(
- SHORT_REF_RE, md_instance
+ inlinePatterns["short_reference"] = ShortReferenceInlineProcessor(
+ REFERENCE_RE, md_instance
)
- inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
- inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
- inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
- inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
- inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance)
- inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE)
- inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em')
- inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'em,strong')
- inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong')
- inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em')
+ inlinePatterns["autolink"] = AutolinkInlineProcessor(AUTOLINK_RE, md_instance)
+ inlinePatterns["automail"] = AutomailInlineProcessor(AUTOMAIL_RE, md_instance)
+ inlinePatterns["linebreak"] = SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br')
+ inlinePatterns["html"] = HtmlInlineProcessor(HTML_RE, md_instance)
+ inlinePatterns["entity"] = HtmlInlineProcessor(ENTITY_RE, md_instance)
+ inlinePatterns["not_strong"] = SimpleTextInlineProcessor(NOT_STRONG_RE)
+ inlinePatterns["em_strong"] = DoubleTagInlineProcessor(EM_STRONG_RE, 'strong,em')
+ inlinePatterns["strong_em"] = DoubleTagInlineProcessor(STRONG_EM_RE, 'em,strong')
+ inlinePatterns["strong"] = SimpleTagInlineProcessor(STRONG_RE, 'strong')
+ inlinePatterns["emphasis"] = SimpleTagInlineProcessor(EMPHASIS_RE, 'em')
if md_instance.smart_emphasis:
- inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em')
+ inlinePatterns["emphasis2"] = SimpleTagInlineProcessor(SMART_EMPHASIS_RE, 'em')
else:
- inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em')
+ inlinePatterns["emphasis2"] = SimpleTagInlineProcessor(EMPHASIS_2_RE, 'em')
return inlinePatterns
@@ -88,54 +88,43 @@ The actual regular expressions for patterns
-----------------------------------------------------------------------------
"""
-NOBRACKET = r'[^\]\[]*'
-BRK = (
- r'\[(' +
- (NOBRACKET + r'(\[')*6 +
- (NOBRACKET + r'\])*')*6 +
- NOBRACKET + r')\]'
-)
NOIMG = r'(?<!\!)'
# `e=f()` or ``e=f("`")``
-BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))'
+BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))'
# \<
ESCAPE_RE = r'\\(.)'
# *emphasis*
-EMPHASIS_RE = r'(\*)([^\*]+)\2'
+EMPHASIS_RE = r'(\*)([^\*]+)\1'
# **strong**
-STRONG_RE = r'(\*{2}|_{2})(.+?)\2'
+STRONG_RE = r'(\*{2}|_{2})(.+?)\1'
# ***strongem*** or ***em*strong**
-EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}'
+EM_STRONG_RE = r'(\*|_)\1{2}(.+?)\1(.*?)\1{2}'
# ***strong**em*
-STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2'
+STRONG_EM_RE = r'(\*|_)\1{2}(.+?)\1{2}(.*?)\1'
# _smart_emphasis_
-SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)'
+SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
# _emphasis_
-EMPHASIS_2_RE = r'(_)(.+?)\2'
+EMPHASIS_2_RE = r'(_)(.+?)\1'
# [text](url) or [text](<url>) or [text](url "title")
-LINK_RE = NOIMG + BRK + \
- r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+LINK_RE = NOIMG + r'\['
# ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
-IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(\s*(<.*?>|([^"\)\s]+\s*"[^"]*"|[^\)\s]*))\s*\)'
+IMAGE_LINK_RE = r'\!\['
# [Google][3]
-REFERENCE_RE = NOIMG + BRK + r'\s?\[([^\]]*)\]'
-
-# [Google]
-SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]'
+REFERENCE_RE = LINK_RE
# ![alt text][2]
-IMAGE_REFERENCE_RE = r'\!' + BRK + r'\s?\[([^\]]*)\]'
+IMAGE_REFERENCE_RE = IMAGE_LINK_RE
# stand-alone * or _
NOT_STRONG_RE = r'((^| )(\*|_)( |$))'
@@ -172,6 +161,7 @@ def handleAttributes(text, parent):
"""Set values of an element based on attribute definitions ({@id=123})."""
def attributeCallback(match):
parent.set(match.group(1), match.group(2).replace('\n', ' '))
+ return ''
return ATTR_RE.sub(attributeCallback, text)
@@ -181,7 +171,7 @@ The pattern classes
"""
-class Pattern(object):
+class Pattern(object): # pragma: no cover
"""Base class that inline patterns subclass. """
ANCESTOR_EXCLUDES = tuple()
@@ -241,24 +231,79 @@ class Pattern(object):
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
-class SimpleTextPattern(Pattern):
+class InlineProcessor(Pattern):
+ """
+ Base class that inline patterns subclass.
+
+ This is the newer style inline processor that uses a more
+ efficient and flexible search approach.
+ """
+
+ def __init__(self, pattern, markdown_instance=None):
+ """
+ Create an instant of an inline pattern.
+
+ Keyword arguments:
+
+ * pattern: A regular expression that matches a pattern
+
+ """
+ self.pattern = pattern
+ self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
+
+ # Api for Markdown to pass safe_mode into instance
+ self.safe_mode = False
+ if markdown_instance:
+ self.markdown = markdown_instance
+
+ def handleMatch(self, m, data):
+ """Return a ElementTree element from the given match and the
+ start and end index of the matched text.
+
+ If `start` and/or `end` are returned as `None`, it will be
+ assumed that the processor did not find a valid region of text.
+
+ Subclasses should override this method.
+
+ Keyword arguments:
+
+ * m: A re match object containing a match of the pattern.
+ * data: The buffer current under analysis
+
+ Returns:
+
+ * el: The ElementTree element, text or None.
+ * start: The start of the region that has been matched or None.
+ * end: The end of the region that has been matched or None.
+
+ """
+ pass # pragma: no cover
+
+
+class SimpleTextPattern(Pattern): # pragma: no cover
""" Return a simple text of group(2) of a Pattern. """
def handleMatch(self, m):
return m.group(2)
-class EscapePattern(Pattern):
+class SimpleTextInlineProcessor(InlineProcessor):
+ """ Return a simple text of group(1) of a Pattern. """
+ def handleMatch(self, m, data):
+ return m.group(1), m.start(0), m.end(0)
+
+
+class EscapeInlineProcessor(InlineProcessor):
""" Return an escaped character. """
- def handleMatch(self, m):
- char = m.group(2)
+ def handleMatch(self, m, data):
+ char = m.group(1)
if char in self.markdown.ESCAPED_CHARS:
- return '%s%s%s' % (util.STX, ord(char), util.ETX)
+ return '%s%s%s' % (util.STX, ord(char), util.ETX), m.start(0), m.end(0)
else:
- return None
+ return None, m.start(0), m.end(0)
-class SimpleTagPattern(Pattern):
+class SimpleTagPattern(Pattern): # pragma: no cover
"""
Return element of type `tag` with a text attribute of group(3)
of a Pattern.
@@ -274,29 +319,51 @@ class SimpleTagPattern(Pattern):
return el
-class SubstituteTagPattern(SimpleTagPattern):
+class SimpleTagInlineProcessor(InlineProcessor):
+ """
+ Return element of type `tag` with a text attribute of group(2)
+ of a Pattern.
+
+ """
+ def __init__(self, pattern, tag):
+ InlineProcessor.__init__(self, pattern)
+ self.tag = tag
+
+ def handleMatch(self, m, data):
+ el = util.etree.Element(self.tag)
+ el.text = m.group(2)
+ return el, m.start(0), m.end(0)
+
+
+class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover
""" Return an element of type `tag` with no children. """
def handleMatch(self, m):
return util.etree.Element(self.tag)
-class BacktickPattern(Pattern):
+class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
+ """ Return an element of type `tag` with no children. """
+ def handleMatch(self, m, data):
+ return util.etree.Element(self.tag), m.start(0), m.end(0)
+
+
+class BacktickInlineProcessor(InlineProcessor):
""" Return a `<code>` element containing the matching text. """
def __init__(self, pattern):
- Pattern.__init__(self, pattern)
+ InlineProcessor.__init__(self, pattern)
self.ESCAPED_BSLASH = '%s%s%s' % (util.STX, ord('\\'), util.ETX)
self.tag = 'code'
- def handleMatch(self, m):
- if m.group(4):
+ def handleMatch(self, m, data):
+ if m.group(3):
el = util.etree.Element(self.tag)
- el.text = util.AtomicString(m.group(4).strip())
- return el
+ el.text = util.AtomicString(m.group(3).strip())
+ return el, m.start(0), m.end(0)
else:
- return m.group(2).replace('\\\\', self.ESCAPED_BSLASH)
+ return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
-class DoubleTagPattern(SimpleTagPattern):
+class DoubleTagPattern(SimpleTagPattern): # pragma: no cover
"""Return a ElementTree element nested in tag2 nested in tag1.
Useful for strong emphasis etc.
@@ -312,12 +379,28 @@ class DoubleTagPattern(SimpleTagPattern):
return el1
-class HtmlPattern(Pattern):
+class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
+ """Return a ElementTree element nested in tag2 nested in tag1.
+
+ Useful for strong emphasis etc.
+
+ """
+ def handleMatch(self, m, data):
+ tag1, tag2 = self.tag.split(",")
+ el1 = util.etree.Element(tag1)
+ el2 = util.etree.SubElement(el1, tag2)
+ el2.text = m.group(2)
+ if len(m.groups()) == 3:
+ el2.tail = m.group(3)
+ return el1, m.start(0), m.end(0)
+
+
+class HtmlInlineProcessor(InlineProcessor):
""" Store raw inline html and return a placeholder. """
- def handleMatch(self, m):
- rawhtml = self.unescape(m.group(2))
+ def handleMatch(self, m, data):
+ rawhtml = self.unescape(m.group(1))
place_holder = self.markdown.htmlStash.store(rawhtml)
- return place_holder
+ return place_holder, m.start(0), m.end(0)
def unescape(self, text):
""" Return unescaped text given text with an inline placeholder. """
@@ -338,74 +421,234 @@ class HtmlPattern(Pattern):
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
-class LinkPattern(Pattern):
+class LinkInlineProcessor(InlineProcessor):
""" Return a link element from the given match. """
- def handleMatch(self, m):
+ RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
+ RE_TITLE_CLEAN = re.compile(r'\s')
+
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+
+ if not handled:
+ return None, None, None
+
+ href, title, index, handled = self.getLink(data, index)
+ if not handled:
+ return None, None, None
+
el = util.etree.Element("a")
- el.text = m.group(2)
- title = m.group(13)
- href = m.group(9)
+ el.text = text
- if href:
- if href[0] == "<":
- href = href[1:-1]
- el.set("href", self.unescape(href.strip()))
- else:
- el.set("href", "")
+ el.set("href", href)
- if title:
- title = dequote(self.unescape(title))
+ if title is not None:
el.set("title", title)
- return el
+ return el, m.start(0), index
+
+ def getLink(self, data, index):
+ """Parse data between `()` of `[Text]()` allowing recursive `()`. """
+
+ href = ''
+ title = None
+ handled = False
+
+ m = self.RE_LINK.match(data, pos=index)
+ if m and m.group(1):
+ # Matches [Text](<link> "title")
+ href = m.group(1)[1:-1].strip()
+ if m.group(3):
+ title = m.group(3)
+ index = m.end(0)
+ handled = True
+ elif m:
+ # Track bracket nesting and index in string
+ bracket_count = 1
+ backtrack_count = 1
+ start_index = m.end()
+ index = start_index
+ last_bracket = -1
+
+ # Primary (first found) quote tracking.
+ quote = None
+ start_quote = -1
+ exit_quote = -1
+ ignore_matches = False
+
+ # Secondary (second found) quote tracking.
+ alt_quote = None
+ start_alt_quote = -1
+ exit_alt_quote = -1
+
+ # Track last character
+ last = ''
+
+ for pos in util.iterrange(index, len(data)):
+ c = data[pos]
+ if c == '(':
+ # Count nested (
+ # Don't increment the bracket count if we are sure we're in a title.
+ if not ignore_matches:
+ bracket_count += 1
+ elif backtrack_count > 0:
+ backtrack_count -= 1
+ elif c == ')':
+ # Match nested ) to (
+ # Don't decrement if we are sure we are in a title that is unclosed.
+ if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)):
+ bracket_count = 0
+ elif not ignore_matches:
+ bracket_count -= 1
+ elif backtrack_count > 0:
+ backtrack_count -= 1
+ # We've found our backup end location if the title doesn't reslove.
+ if backtrack_count == 0:
+ last_bracket = index + 1
+
+ elif c in ("'", '"'):
+ # Quote has started
+ if not quote:
+ # We'll assume we are now in a title.
+ # Brackets are quoted, so no need to match them (except for the final one).
+ ignore_matches = True
+ backtrack_count = bracket_count
+ bracket_count = 1
+ start_quote = index + 1
+ quote = c
+ # Secondary quote (in case the first doesn't resolve): [text](link'"title")
+ elif c != quote and not alt_quote:
+ start_alt_quote = index + 1
+ alt_quote = c
+ # Update primary quote match
+ elif c == quote:
+ exit_quote = index + 1
+ # Update secondary quote match
+ elif alt_quote and c == alt_quote:
+ exit_alt_quote = index + 1
+
+ index += 1
+
+ # Link is closed, so let's break out of the loop
+ if bracket_count == 0:
+ # Get the title if we closed a title string right before link closed
+ if exit_quote >= 0 and quote == last:
+ href = data[start_index:start_quote - 1]
+ title = ''.join(data[start_quote:exit_quote - 1])
+ elif exit_alt_quote >= 0 and alt_quote == last:
+ href = data[start_index:start_alt_quote - 1]
+ title = ''.join(data[start_alt_quote:exit_alt_quote - 1])
+ else:
+ href = data[start_index:index - 1]
+ break
+
+ if c != ' ':
+ last = c
+
+ # We have a scenario: [test](link"notitle)
+ # When we enter a string, we stop tracking bracket resolution in the main counter,
+ # but we do keep a backup counter up until we discover where we might resolve all brackets
+ # if the title string fails to resolve.
+ if bracket_count != 0 and backtrack_count == 0:
+ href = data[start_index:last_bracket - 1]
+ index = last_bracket
+ bracket_count = 0
+
+ handled = bracket_count == 0
+
+ if title is not None:
+ title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip())))
+
+ href = self.unescape(href).strip()
+
+ return href, title, index, handled
+
+ def getText(self, data, index):
+ """Parse the content between `[]` of the start of an image or link
+ resolving nested square brackets.
-class ImagePattern(LinkPattern):
+ """
+ bracket_count = 1
+ text = []
+ for pos in util.iterrange(index, len(data)):
+ c = data[pos]
+ if c == ']':
+ bracket_count -= 1
+ elif c == '[':
+ bracket_count += 1
+ index += 1
+ if bracket_count == 0:
+ break
+ text.append(c)
+ return ''.join(text), index, bracket_count == 0
+
+
+class ImageInlineProcessor(LinkInlineProcessor):
""" Return a img element from the given match. """
- def handleMatch(self, m):
+
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+ if not handled:
+ return None, None, None
+
+ src, title, index, handled = self.getLink(data, index)
+ if not handled:
+ return None, None, None
+
el = util.etree.Element("img")
- src_parts = m.group(9).split()
- if src_parts:
- src = src_parts[0]
- if src[0] == "<" and src[-1] == ">":
- src = src[1:-1]
- el.set('src', self.unescape(src))
- else:
- el.set('src', "")
- if len(src_parts) > 1:
- el.set('title', dequote(self.unescape(" ".join(src_parts[1:]))))
+
+ el.set("src", src)
+
+ if title is not None:
+ el.set("title", title)
if self.markdown.enable_attributes:
- truealt = handleAttributes(m.group(2), el)
+ truealt = handleAttributes(text, el)
else:
- truealt = m.group(2)
+ truealt = text
el.set('alt', self.unescape(truealt))
- return el
+ return el, m.start(0), index
-class ReferencePattern(LinkPattern):
+class ReferenceInlineProcessor(LinkInlineProcessor):
""" Match to a stored reference and return link element. """
-
NEWLINE_CLEANUP_RE = re.compile(r'[ ]?\n', re.MULTILINE)
- def handleMatch(self, m):
- try:
- id = m.group(9).lower()
- except IndexError:
- id = None
- if not id:
- # if we got something like "[Google][]" or "[Google]"
- # we'll use "google" as the id
- id = m.group(2).lower()
+ RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
+
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+ if not handled:
+ return None, None, None
+
+ id, end, handled = self.evalId(data, index, text)
+ if not handled:
+ return None, None, None
# Clean up linebreaks in id
id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
if id not in self.markdown.references: # ignore undefined refs
- return None
+ return None, m.start(0), end
+
href, title = self.markdown.references[id]
- text = m.group(2)
- return self.makeTag(href, title, text)
+ return self.makeTag(href, title, text), m.start(0), end
+
+ def evalId(self, data, index, text):
+ """
+ Evaluate the id portion of [ref][id].
+
+ If [ref][] use [ref].
+ """
+ m = self.RE_LINK.match(data, pos=index)
+ if not m:
+ return None, index, False
+ else:
+ id = m.group(1).lower()
+ end = m.end(0)
+ if not id:
+ id = text.lower()
+ return id, end, True
def makeTag(self, href, title, text):
el = util.etree.Element('a')
@@ -418,7 +661,15 @@ class ReferencePattern(LinkPattern):
return el
-class ImageReferencePattern(ReferencePattern):
+class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
+ """Shorte form of reference: [google]. """
+ def evalId(self, data, index, text):
+ """Evaluate the id from of [ref] """
+
+ return text.lower(), index, True
+
+
+class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
""" Match to a stored reference and return img element. """
def makeTag(self, href, title, text):
el = util.etree.Element("img")
@@ -433,22 +684,22 @@ class ImageReferencePattern(ReferencePattern):
return el
-class AutolinkPattern(Pattern):
+class AutolinkInlineProcessor(InlineProcessor):
""" Return a link Element given an autolink (`<http://example/com>`). """
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
el = util.etree.Element("a")
- el.set('href', self.unescape(m.group(2)))
- el.text = util.AtomicString(m.group(2))
- return el
+ el.set('href', self.unescape(m.group(1)))
+ el.text = util.AtomicString(m.group(1))
+ return el, m.start(0), m.end(0)
-class AutomailPattern(Pattern):
+class AutomailInlineProcessor(InlineProcessor):
"""
Return a mailto link Element given an automail link (`<foo@example.com>`).
"""
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
el = util.etree.Element('a')
- email = self.unescape(m.group(2))
+ email = self.unescape(m.group(1))
if email.startswith("mailto:"):
email = email[len("mailto:"):]
@@ -467,4 +718,4 @@ class AutomailPattern(Pattern):
mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
ord(letter) for letter in mailto])
el.set('href', mailto)
- return el
+ return el, m.start(0), m.end(0)