aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/extensions/api.md109
-rw-r--r--markdown/extensions/abbr.py12
-rw-r--r--markdown/extensions/footnotes.py16
-rw-r--r--markdown/extensions/nl2br.py4
-rw-r--r--markdown/extensions/smart_strong.py10
-rw-r--r--markdown/extensions/smarty.py18
-rw-r--r--markdown/extensions/wikilinks.py16
-rw-r--r--markdown/inlinepatterns.py483
-rw-r--r--markdown/treeprocessors.py35
-rw-r--r--markdown/util.py2
-rw-r--r--tests/misc/image.html5
-rw-r--r--tests/misc/image.txt12
-rw-r--r--tests/test_apis.py10
-rw-r--r--tests/test_syntax/inline/__init__.py0
-rw-r--r--tests/test_syntax/inline/images.py139
-rw-r--r--tests/test_syntax/inline/links.py98
16 files changed, 785 insertions, 184 deletions
diff --git a/docs/extensions/api.md b/docs/extensions/api.md
index cba4ea7..ad0d254 100644
--- a/docs/extensions/api.md
+++ b/docs/extensions/api.md
@@ -48,6 +48,8 @@ class MyPreprocessor(Preprocessor):
## Inline Patterns {: #inlinepatterns }
+### Legacy
+
Inline Patterns implement the inline HTML element syntax for Markdown such as
`*emphasis*` or `[links](http://example.com)`. Pattern objects should be
instances of classes that inherit from `markdown.inlinepatterns.Pattern` or
@@ -85,7 +87,7 @@ from markdown.util import etree
class EmphasisPattern(Pattern):
def handleMatch(self, m):
el = etree.Element('em')
- el.text = m.group(3)
+ el.text = m.group(2)
return el
```
@@ -110,8 +112,113 @@ implemented with separate instances of the `SimpleTagPattern` listed below.
Feel free to use or extend any of the Pattern classes found at
`markdown.inlinepatterns`.
+### Future
+
+While users can still create plugins with the existing
+`markdown.inlinepatterns.Pattern`, a new, more flexible inline processor has
+been added which users are encouraged to migrate to. The new inline processor
+is found at `markdown.inlinepatterns.InlineProcessor`.
+
+The new processor is very similar to legacy with two major distinctions.
+
+1. Patterns no longer need to match the entire block, so patterns no longer
+ start with `r'^(.*?)'` and end with `r'(.*?)!'`. This was a huge
+ performance sink and this requirement has been removed. The returned match
+ object will only contain what is explicitly matched in the pattern, and
+ extension pattern groups now start with `m.group(1)`.
+
+2. The `handleMatch` method now takes an additional input called `data`,
+ which is the entire block under analysis, not just what is matched with
+ the specified pattern. The method also returns the element *and* the index
+ boundaries relative to `data` that the return element is replacing
+ (usually `m.start(0)` and `m.end(0)`). If the boundaries are returned as
+ `None`, it is assumed that the match did not take place, and nothing will
+ be altered in `data`.
+
+If all you need is the same functionality as the legacy processor, you can do
+as shown below. Most of the time, simple regular expression processing is all
+you'll need.
+
+```python
+from markdown.inlinepatterns import InlineProcessor
+from markdown.util import etree
+
+# an oversimplified regex
+MYPATTERN = r'\*([^*]+)\*'
+
+class EmphasisPattern(InlineProcessor):
+ def handleMatch(self, m, data):
+ el = etree.Element('em')
+ el.text = m.group(1)
+ return el, m.start(0), m.end(0)
+
+# pass in pattern and create instance
+emphasis = EmphasisPattern(MYPATTERN)
+```
+
+But, the new processor allows you handle much more complex patterns that are
+too much for Python's Re to handle. For instance, to handle nested brackets in
+link patterns, the built-in link inline processor uses the following pattern to
+find where a link *might* start:
+
+```python
+LINK_RE = NOIMG + r'\['
+link = LinkInlineProcessor(LINK_RE, md_instance)
+```
+
+It then uses programmed logic to actually walk the string (`data`), starting at
+where the match started (`m.start(0)`). If for whatever reason, the text
+does not appear to be a link, it returns `None` for the start and end boundary
+in order to communicate to the parser that no match was found.
+
+```python
+ # Just a snippet of of the link's handleMatch
+ # method to illustrate new logic
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+
+ if not handled:
+ return None, None, None
+
+ href, title, index, handled = self.getLink(data, index)
+ if not handled:
+ return None, None, None
+
+ el = util.etree.Element("a")
+ el.text = text
+
+ el.set("href", href)
+
+ if title is not None:
+ el.set("title", title)
+
+ return el, m.start(0), index
+```
+
### Generic Pattern Classes
+Some example processors that are available.
+
+* **`SimpleTextInlineProcessor(pattern)`**:
+
+ Returns simple text of `group(2)` of a `pattern` and the start and end
+ position of the match.
+
+* **`SimpleTagInlineProcessor(pattern, tag)`**:
+
+ Returns an element of type "`tag`" with a text attribute of `group(3)`
+ of a `pattern`. `tag` should be a string of a HTML element (i.e.: 'em').
+ It also returns the start and end position of the match.
+
+* **`SubstituteTagInlineProcessor(pattern, tag)`**:
+
+ Returns an element of type "`tag`" with no children or text (i.e.: `br`)
+ and the start and end position of the match.
+
+A very small number of the basic legacy processors are still available to
+prevent breakage of 3rd party extensions during the transition period to the
+new processors. Three of the available processors are listed below.
+
* **`SimpleTextPattern(pattern)`**:
Returns simple text of `group(2)` of a `pattern`.
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 2553aac..5e8845b 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -20,7 +20,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
from ..preprocessors import Preprocessor
-from ..inlinepatterns import Pattern
+from ..inlinepatterns import InlineProcessor
from ..util import etree, AtomicString
import re
@@ -52,7 +52,7 @@ class AbbrPreprocessor(Preprocessor):
abbr = m.group('abbr').strip()
title = m.group('title').strip()
self.markdown.inlinePatterns['abbr-%s' % abbr] = \
- AbbrPattern(self._generate_pattern(abbr), title)
+ AbbrInlineProcessor(self._generate_pattern(abbr), title)
# Preserve the line to prevent raw HTML indexing issue.
# https://github.com/Python-Markdown/markdown/issues/584
new_text.append('')
@@ -76,18 +76,18 @@ class AbbrPreprocessor(Preprocessor):
return r'(?P<abbr>\b%s\b)' % (r''.join(chars))
-class AbbrPattern(Pattern):
+class AbbrInlineProcessor(InlineProcessor):
""" Abbreviation inline pattern. """
def __init__(self, pattern, title):
- super(AbbrPattern, self).__init__(pattern)
+ super(AbbrInlineProcessor, self).__init__(pattern)
self.title = title
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
abbr = etree.Element('abbr')
abbr.text = AtomicString(m.group('abbr'))
abbr.set('title', self.title)
- return abbr
+ return abbr, m.start(0), m.end(0)
def makeExtension(**kwargs): # pragma: no cover
diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
index d16cf84..a957278 100644
--- a/markdown/extensions/footnotes.py
+++ b/markdown/extensions/footnotes.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
from ..preprocessors import Preprocessor
-from ..inlinepatterns import Pattern
+from ..inlinepatterns import InlineProcessor
from ..treeprocessors import Treeprocessor
from ..postprocessors import Postprocessor
from .. import util
@@ -77,7 +77,7 @@ class FootnoteExtension(Extension):
# Insert an inline pattern before ImageReferencePattern
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
md.inlinePatterns.add(
- "footnote", FootnotePattern(FOOTNOTE_RE, self), "<reference"
+ "footnote", FootnoteInlineProcessor(FOOTNOTE_RE, self), "<reference"
)
# Insert a tree-processor that would actually add the footnote div
# This must be before all other treeprocessors (i.e., inline and
@@ -315,15 +315,15 @@ class FootnotePreprocessor(Preprocessor):
return items, i
-class FootnotePattern(Pattern):
+class FootnoteInlineProcessor(InlineProcessor):
""" InlinePattern for footnote markers in a document's body text. """
def __init__(self, pattern, footnotes):
- super(FootnotePattern, self).__init__(pattern)
+ super(FootnoteInlineProcessor, self).__init__(pattern)
self.footnotes = footnotes
- def handleMatch(self, m):
- id = m.group(2)
+ def handleMatch(self, m, data):
+ id = m.group(1)
if id in self.footnotes.footnotes.keys():
sup = util.etree.Element("sup")
a = util.etree.SubElement(sup, "a")
@@ -333,9 +333,9 @@ class FootnotePattern(Pattern):
a.set('rel', 'footnote') # invalid in HTML5
a.set('class', 'footnote-ref')
a.text = util.text_type(self.footnotes.footnotes.index(id) + 1)
- return sup
+ return sup, m.start(0), m.end(0)
else:
- return None
+ return None, None, None
class FootnotePostTreeprocessor(Treeprocessor):
diff --git a/markdown/extensions/nl2br.py b/markdown/extensions/nl2br.py
index 687d1eb..5b9373f 100644
--- a/markdown/extensions/nl2br.py
+++ b/markdown/extensions/nl2br.py
@@ -19,7 +19,7 @@ License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
-from ..inlinepatterns import SubstituteTagPattern
+from ..inlinepatterns import SubstituteTagInlineProcessor
BR_RE = r'\n'
@@ -27,7 +27,7 @@ BR_RE = r'\n'
class Nl2BrExtension(Extension):
def extendMarkdown(self, md, md_globals):
- br_tag = SubstituteTagPattern(BR_RE, 'br')
+ br_tag = SubstituteTagInlineProcessor(BR_RE, 'br')
md.inlinePatterns.add('nl', br_tag, '_end')
diff --git a/markdown/extensions/smart_strong.py b/markdown/extensions/smart_strong.py
index 1b00f84..f34531d 100644
--- a/markdown/extensions/smart_strong.py
+++ b/markdown/extensions/smart_strong.py
@@ -18,10 +18,10 @@ License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
-from ..inlinepatterns import SimpleTagPattern
+from ..inlinepatterns import SimpleTagInlineProcessor
-SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\2(?!\w)'
-STRONG_RE = r'(\*{2})(.+?)\2'
+SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)'
+STRONG_RE = r'(\*{2})(.+?)\1'
class SmartEmphasisExtension(Extension):
@@ -29,10 +29,10 @@ class SmartEmphasisExtension(Extension):
def extendMarkdown(self, md, md_globals):
""" Modify inline patterns. """
- md.inlinePatterns['strong'] = SimpleTagPattern(STRONG_RE, 'strong')
+ md.inlinePatterns['strong'] = SimpleTagInlineProcessor(STRONG_RE, 'strong')
md.inlinePatterns.add(
'strong2',
- SimpleTagPattern(SMART_STRONG_RE, 'strong'),
+ SimpleTagInlineProcessor(SMART_STRONG_RE, 'strong'),
'>emphasis2'
)
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
index 35c78a8..189651f 100644
--- a/markdown/extensions/smarty.py
+++ b/markdown/extensions/smarty.py
@@ -83,7 +83,7 @@ smartypants.py license:
from __future__ import unicode_literals
from . import Extension
-from ..inlinepatterns import HtmlPattern, HTML_RE
+from ..inlinepatterns import HtmlInlineProcessor, HTML_RE
from ..odict import OrderedDict
from ..treeprocessors import InlineProcessor
@@ -150,21 +150,21 @@ remainingDoubleQuotesRegex = r'"'
HTML_STRICT_RE = HTML_RE + r'(?!\>)'
-class SubstituteTextPattern(HtmlPattern):
+class SubstituteTextPattern(HtmlInlineProcessor):
def __init__(self, pattern, replace, markdown_instance):
""" Replaces matches with some text. """
- HtmlPattern.__init__(self, pattern)
+ HtmlInlineProcessor.__init__(self, pattern)
self.replace = replace
self.markdown = markdown_instance
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
result = ''
for part in self.replace:
if isinstance(part, int):
result += m.group(part)
else:
result += self.markdown.htmlStash.store(part)
- return result
+ return result, m.start(0), m.end(0)
class SmartyExtension(Extension):
@@ -233,11 +233,11 @@ class SmartyExtension(Extension):
(doubleQuoteSetsRe, (ldquo + lsquo,)),
(singleQuoteSetsRe, (lsquo + ldquo,)),
(decadeAbbrRe, (rsquo,)),
- (openingSingleQuotesRegex, (2, lsquo)),
+ (openingSingleQuotesRegex, (1, lsquo)),
(closingSingleQuotesRegex, (rsquo,)),
- (closingSingleQuotesRegex2, (rsquo, 2)),
+ (closingSingleQuotesRegex2, (rsquo, 1)),
(remainingSingleQuotesRegex, (lsquo,)),
- (openingDoubleQuotesRegex, (2, ldquo)),
+ (openingDoubleQuotesRegex, (1, ldquo)),
(closingDoubleQuotesRegex, (rdquo,)),
(closingDoubleQuotesRegex2, (rdquo,)),
(remainingDoubleQuotesRegex, (ldquo,))
@@ -255,7 +255,7 @@ class SmartyExtension(Extension):
self.educateAngledQuotes(md)
# Override HTML_RE from inlinepatterns.py so that it does not
# process tags with duplicate closing quotes.
- md.inlinePatterns["html"] = HtmlPattern(HTML_STRICT_RE, md)
+ md.inlinePatterns["html"] = HtmlInlineProcessor(HTML_STRICT_RE, md)
if configs['smart_dashes']:
self.educateDashes(md)
inlineProcessor = InlineProcessor(md)
diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py
index a4a3515..b535d9c 100644
--- a/markdown/extensions/wikilinks.py
+++ b/markdown/extensions/wikilinks.py
@@ -18,7 +18,7 @@ License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
-from ..inlinepatterns import Pattern
+from ..inlinepatterns import InlineProcessor
from ..util import etree
import re
@@ -46,20 +46,20 @@ class WikiLinkExtension(Extension):
# append to end of inline patterns
WIKILINK_RE = r'\[\[([\w0-9_ -]+)\]\]'
- wikilinkPattern = WikiLinks(WIKILINK_RE, self.getConfigs())
+ wikilinkPattern = WikiLinksInlineProcessor(WIKILINK_RE, self.getConfigs())
wikilinkPattern.md = md
md.inlinePatterns.add('wikilink', wikilinkPattern, "<not_strong")
-class WikiLinks(Pattern):
+class WikiLinksInlineProcessor(InlineProcessor):
def __init__(self, pattern, config):
- super(WikiLinks, self).__init__(pattern)
+ super(WikiLinksInlineProcessor, self).__init__(pattern)
self.config = config
- def handleMatch(self, m):
- if m.group(2).strip():
+ def handleMatch(self, m, data):
+ if m.group(1).strip():
base_url, end_url, html_class = self._getMeta()
- label = m.group(2).strip()
+ label = m.group(1).strip()
url = self.config['build_url'](label, base_url, end_url)
a = etree.Element('a')
a.text = label
@@ -68,7 +68,7 @@ class WikiLinks(Pattern):
a.set('class', html_class)
else:
a = ''
- return a
+ return a, m.start(0), m.end(0)
def _getMeta(self):
""" Return meta data or config data. """
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index dbb4d06..18da73b 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -55,31 +55,31 @@ except ImportError: # pragma: no cover
def build_inlinepatterns(md_instance, **kwargs):
""" Build the default set of inline patterns for Markdown. """
inlinePatterns = odict.OrderedDict()
- inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE)
- inlinePatterns["escape"] = EscapePattern(ESCAPE_RE, md_instance)
- inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance)
- inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance)
- inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance)
- inlinePatterns["image_reference"] = ImageReferencePattern(
+ inlinePatterns["backtick"] = BacktickInlineProcessor(BACKTICK_RE)
+ inlinePatterns["escape"] = EscapeInlineProcessor(ESCAPE_RE, md_instance)
+ inlinePatterns["reference"] = ReferenceInlineProcessor(REFERENCE_RE, md_instance)
+ inlinePatterns["link"] = LinkInlineProcessor(LINK_RE, md_instance)
+ inlinePatterns["image_link"] = ImageInlineProcessor(IMAGE_LINK_RE, md_instance)
+ inlinePatterns["image_reference"] = ImageReferenceInlineProcessor(
IMAGE_REFERENCE_RE, md_instance
)
- inlinePatterns["short_reference"] = ReferencePattern(
- SHORT_REF_RE, md_instance
+ inlinePatterns["short_reference"] = ShortReferenceInlineProcessor(
+ REFERENCE_RE, md_instance
)
- inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
- inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
- inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
- inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
- inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance)
- inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE)
- inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em')
- inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'em,strong')
- inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong')
- inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em')
+ inlinePatterns["autolink"] = AutolinkInlineProcessor(AUTOLINK_RE, md_instance)
+ inlinePatterns["automail"] = AutomailInlineProcessor(AUTOMAIL_RE, md_instance)
+ inlinePatterns["linebreak"] = SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br')
+ inlinePatterns["html"] = HtmlInlineProcessor(HTML_RE, md_instance)
+ inlinePatterns["entity"] = HtmlInlineProcessor(ENTITY_RE, md_instance)
+ inlinePatterns["not_strong"] = SimpleTextInlineProcessor(NOT_STRONG_RE)
+ inlinePatterns["em_strong"] = DoubleTagInlineProcessor(EM_STRONG_RE, 'strong,em')
+ inlinePatterns["strong_em"] = DoubleTagInlineProcessor(STRONG_EM_RE, 'em,strong')
+ inlinePatterns["strong"] = SimpleTagInlineProcessor(STRONG_RE, 'strong')
+ inlinePatterns["emphasis"] = SimpleTagInlineProcessor(EMPHASIS_RE, 'em')
if md_instance.smart_emphasis:
- inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em')
+ inlinePatterns["emphasis2"] = SimpleTagInlineProcessor(SMART_EMPHASIS_RE, 'em')
else:
- inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em')
+ inlinePatterns["emphasis2"] = SimpleTagInlineProcessor(EMPHASIS_2_RE, 'em')
return inlinePatterns
@@ -88,54 +88,43 @@ The actual regular expressions for patterns
-----------------------------------------------------------------------------
"""
-NOBRACKET = r'[^\]\[]*'
-BRK = (
- r'\[(' +
- (NOBRACKET + r'(\[')*6 +
- (NOBRACKET + r'\])*')*6 +
- NOBRACKET + r')\]'
-)
NOIMG = r'(?<!\!)'
# `e=f()` or ``e=f("`")``
-BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))'
+BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))'
# \<
ESCAPE_RE = r'\\(.)'
# *emphasis*
-EMPHASIS_RE = r'(\*)([^\*]+)\2'
+EMPHASIS_RE = r'(\*)([^\*]+)\1'
# **strong**
-STRONG_RE = r'(\*{2}|_{2})(.+?)\2'
+STRONG_RE = r'(\*{2}|_{2})(.+?)\1'
# ***strongem*** or ***em*strong**
-EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}'
+EM_STRONG_RE = r'(\*|_)\1{2}(.+?)\1(.*?)\1{2}'
# ***strong**em*
-STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2'
+STRONG_EM_RE = r'(\*|_)\1{2}(.+?)\1{2}(.*?)\1'
# _smart_emphasis_
-SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)'
+SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
# _emphasis_
-EMPHASIS_2_RE = r'(_)(.+?)\2'
+EMPHASIS_2_RE = r'(_)(.+?)\1'
# [text](url) or [text](<url>) or [text](url "title")
-LINK_RE = NOIMG + BRK + \
- r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+LINK_RE = NOIMG + r'\['
# ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
-IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(\s*(<.*?>|([^"\)\s]+\s*"[^"]*"|[^\)\s]*))\s*\)'
+IMAGE_LINK_RE = r'\!\['
# [Google][3]
-REFERENCE_RE = NOIMG + BRK + r'\s?\[([^\]]*)\]'
-
-# [Google]
-SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]'
+REFERENCE_RE = LINK_RE
# ![alt text][2]
-IMAGE_REFERENCE_RE = r'\!' + BRK + r'\s?\[([^\]]*)\]'
+IMAGE_REFERENCE_RE = IMAGE_LINK_RE
# stand-alone * or _
NOT_STRONG_RE = r'((^| )(\*|_)( |$))'
@@ -172,6 +161,7 @@ def handleAttributes(text, parent):
"""Set values of an element based on attribute definitions ({@id=123})."""
def attributeCallback(match):
parent.set(match.group(1), match.group(2).replace('\n', ' '))
+ return ''
return ATTR_RE.sub(attributeCallback, text)
@@ -181,7 +171,7 @@ The pattern classes
"""
-class Pattern(object):
+class Pattern(object): # pragma: no cover
"""Base class that inline patterns subclass. """
ANCESTOR_EXCLUDES = tuple()
@@ -241,24 +231,79 @@ class Pattern(object):
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
-class SimpleTextPattern(Pattern):
+class InlineProcessor(Pattern):
+ """
+ Base class that inline patterns subclass.
+
+ This is the newer style inline processor that uses a more
+ efficient and flexible search approach.
+ """
+
+ def __init__(self, pattern, markdown_instance=None):
+ """
+ Create an instant of an inline pattern.
+
+ Keyword arguments:
+
+ * pattern: A regular expression that matches a pattern
+
+ """
+ self.pattern = pattern
+ self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
+
+ # Api for Markdown to pass safe_mode into instance
+ self.safe_mode = False
+ if markdown_instance:
+ self.markdown = markdown_instance
+
+ def handleMatch(self, m, data):
+ """Return a ElementTree element from the given match and the
+ start and end index of the matched text.
+
+ If `start` and/or `end` are returned as `None`, it will be
+ assumed that the processor did not find a valid region of text.
+
+ Subclasses should override this method.
+
+ Keyword arguments:
+
+ * m: A re match object containing a match of the pattern.
+ * data: The buffer current under analysis
+
+ Returns:
+
+ * el: The ElementTree element, text or None.
+ * start: The start of the region that has been matched or None.
+ * end: The end of the region that has been matched or None.
+
+ """
+ pass # pragma: no cover
+
+
+class SimpleTextPattern(Pattern): # pragma: no cover
""" Return a simple text of group(2) of a Pattern. """
def handleMatch(self, m):
return m.group(2)
-class EscapePattern(Pattern):
+class SimpleTextInlineProcessor(InlineProcessor):
+ """ Return a simple text of group(1) of a Pattern. """
+ def handleMatch(self, m, data):
+ return m.group(1), m.start(0), m.end(0)
+
+
+class EscapeInlineProcessor(InlineProcessor):
""" Return an escaped character. """
- def handleMatch(self, m):
- char = m.group(2)
+ def handleMatch(self, m, data):
+ char = m.group(1)
if char in self.markdown.ESCAPED_CHARS:
- return '%s%s%s' % (util.STX, ord(char), util.ETX)
+ return '%s%s%s' % (util.STX, ord(char), util.ETX), m.start(0), m.end(0)
else:
- return None
+ return None, m.start(0), m.end(0)
-class SimpleTagPattern(Pattern):
+class SimpleTagPattern(Pattern): # pragma: no cover
"""
Return element of type `tag` with a text attribute of group(3)
of a Pattern.
@@ -274,29 +319,51 @@ class SimpleTagPattern(Pattern):
return el
-class SubstituteTagPattern(SimpleTagPattern):
+class SimpleTagInlineProcessor(InlineProcessor):
+ """
+ Return element of type `tag` with a text attribute of group(2)
+ of a Pattern.
+
+ """
+ def __init__(self, pattern, tag):
+ InlineProcessor.__init__(self, pattern)
+ self.tag = tag
+
+ def handleMatch(self, m, data):
+ el = util.etree.Element(self.tag)
+ el.text = m.group(2)
+ return el, m.start(0), m.end(0)
+
+
+class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover
""" Return an element of type `tag` with no children. """
def handleMatch(self, m):
return util.etree.Element(self.tag)
-class BacktickPattern(Pattern):
+class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
+ """ Return an element of type `tag` with no children. """
+ def handleMatch(self, m, data):
+ return util.etree.Element(self.tag), m.start(0), m.end(0)
+
+
+class BacktickInlineProcessor(InlineProcessor):
""" Return a `<code>` element containing the matching text. """
def __init__(self, pattern):
- Pattern.__init__(self, pattern)
+ InlineProcessor.__init__(self, pattern)
self.ESCAPED_BSLASH = '%s%s%s' % (util.STX, ord('\\'), util.ETX)
self.tag = 'code'
- def handleMatch(self, m):
- if m.group(4):
+ def handleMatch(self, m, data):
+ if m.group(3):
el = util.etree.Element(self.tag)
- el.text = util.AtomicString(m.group(4).strip())
- return el
+ el.text = util.AtomicString(m.group(3).strip())
+ return el, m.start(0), m.end(0)
else:
- return m.group(2).replace('\\\\', self.ESCAPED_BSLASH)
+ return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
-class DoubleTagPattern(SimpleTagPattern):
+class DoubleTagPattern(SimpleTagPattern): # pragma: no cover
"""Return a ElementTree element nested in tag2 nested in tag1.
Useful for strong emphasis etc.
@@ -312,12 +379,28 @@ class DoubleTagPattern(SimpleTagPattern):
return el1
-class HtmlPattern(Pattern):
+class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
+ """Return a ElementTree element nested in tag2 nested in tag1.
+
+ Useful for strong emphasis etc.
+
+ """
+ def handleMatch(self, m, data):
+ tag1, tag2 = self.tag.split(",")
+ el1 = util.etree.Element(tag1)
+ el2 = util.etree.SubElement(el1, tag2)
+ el2.text = m.group(2)
+ if len(m.groups()) == 3:
+ el2.tail = m.group(3)
+ return el1, m.start(0), m.end(0)
+
+
+class HtmlInlineProcessor(InlineProcessor):
""" Store raw inline html and return a placeholder. """
- def handleMatch(self, m):
- rawhtml = self.unescape(m.group(2))
+ def handleMatch(self, m, data):
+ rawhtml = self.unescape(m.group(1))
place_holder = self.markdown.htmlStash.store(rawhtml)
- return place_holder
+ return place_holder, m.start(0), m.end(0)
def unescape(self, text):
""" Return unescaped text given text with an inline placeholder. """
@@ -338,74 +421,234 @@ class HtmlPattern(Pattern):
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
-class LinkPattern(Pattern):
+class LinkInlineProcessor(InlineProcessor):
""" Return a link element from the given match. """
- def handleMatch(self, m):
+ RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
+ RE_TITLE_CLEAN = re.compile(r'\s')
+
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+
+ if not handled:
+ return None, None, None
+
+ href, title, index, handled = self.getLink(data, index)
+ if not handled:
+ return None, None, None
+
el = util.etree.Element("a")
- el.text = m.group(2)
- title = m.group(13)
- href = m.group(9)
+ el.text = text
- if href:
- if href[0] == "<":
- href = href[1:-1]
- el.set("href", self.unescape(href.strip()))
- else:
- el.set("href", "")
+ el.set("href", href)
- if title:
- title = dequote(self.unescape(title))
+ if title is not None:
el.set("title", title)
- return el
+ return el, m.start(0), index
+
+ def getLink(self, data, index):
+ """Parse data between `()` of `[Text]()` allowing recursive `()`. """
+
+ href = ''
+ title = None
+ handled = False
+
+ m = self.RE_LINK.match(data, pos=index)
+ if m and m.group(1):
+ # Matches [Text](<link> "title")
+ href = m.group(1)[1:-1].strip()
+ if m.group(3):
+ title = m.group(3)
+ index = m.end(0)
+ handled = True
+ elif m:
+ # Track bracket nesting and index in string
+ bracket_count = 1
+ backtrack_count = 1
+ start_index = m.end()
+ index = start_index
+ last_bracket = -1
+
+ # Primary (first found) quote tracking.
+ quote = None
+ start_quote = -1
+ exit_quote = -1
+ ignore_matches = False
+
+ # Secondary (second found) quote tracking.
+ alt_quote = None
+ start_alt_quote = -1
+ exit_alt_quote = -1
+
+ # Track last character
+ last = ''
+
+ for pos in util.iterrange(index, len(data)):
+ c = data[pos]
+ if c == '(':
+ # Count nested (
+ # Don't increment the bracket count if we are sure we're in a title.
+ if not ignore_matches:
+ bracket_count += 1
+ elif backtrack_count > 0:
+ backtrack_count -= 1
+ elif c == ')':
+ # Match nested ) to (
+ # Don't decrement if we are sure we are in a title that is unclosed.
+ if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)):
+ bracket_count = 0
+ elif not ignore_matches:
+ bracket_count -= 1
+ elif backtrack_count > 0:
+ backtrack_count -= 1
+ # We've found our backup end location if the title doesn't reslove.
+ if backtrack_count == 0:
+ last_bracket = index + 1
+
+ elif c in ("'", '"'):
+ # Quote has started
+ if not quote:
+ # We'll assume we are now in a title.
+ # Brackets are quoted, so no need to match them (except for the final one).
+ ignore_matches = True
+ backtrack_count = bracket_count
+ bracket_count = 1
+ start_quote = index + 1
+ quote = c
+ # Secondary quote (in case the first doesn't resolve): [text](link'"title")
+ elif c != quote and not alt_quote:
+ start_alt_quote = index + 1
+ alt_quote = c
+ # Update primary quote match
+ elif c == quote:
+ exit_quote = index + 1
+ # Update secondary quote match
+ elif alt_quote and c == alt_quote:
+ exit_alt_quote = index + 1
+
+ index += 1
+
+ # Link is closed, so let's break out of the loop
+ if bracket_count == 0:
+ # Get the title if we closed a title string right before link closed
+ if exit_quote >= 0 and quote == last:
+ href = data[start_index:start_quote - 1]
+ title = ''.join(data[start_quote:exit_quote - 1])
+ elif exit_alt_quote >= 0 and alt_quote == last:
+ href = data[start_index:start_alt_quote - 1]
+ title = ''.join(data[start_alt_quote:exit_alt_quote - 1])
+ else:
+ href = data[start_index:index - 1]
+ break
+
+ if c != ' ':
+ last = c
+
+ # We have a scenario: [test](link"notitle)
+ # When we enter a string, we stop tracking bracket resolution in the main counter,
+ # but we do keep a backup counter up until we discover where we might resolve all brackets
+ # if the title string fails to resolve.
+ if bracket_count != 0 and backtrack_count == 0:
+ href = data[start_index:last_bracket - 1]
+ index = last_bracket
+ bracket_count = 0
+
+ handled = bracket_count == 0
+
+ if title is not None:
+ title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip())))
+
+ href = self.unescape(href).strip()
+
+ return href, title, index, handled
+
+ def getText(self, data, index):
+ """Parse the content between `[]` of the start of an image or link
+ resolving nested square brackets.
-class ImagePattern(LinkPattern):
+ """
+ bracket_count = 1
+ text = []
+ for pos in util.iterrange(index, len(data)):
+ c = data[pos]
+ if c == ']':
+ bracket_count -= 1
+ elif c == '[':
+ bracket_count += 1
+ index += 1
+ if bracket_count == 0:
+ break
+ text.append(c)
+ return ''.join(text), index, bracket_count == 0
+
+
+class ImageInlineProcessor(LinkInlineProcessor):
""" Return a img element from the given match. """
- def handleMatch(self, m):
+
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+ if not handled:
+ return None, None, None
+
+ src, title, index, handled = self.getLink(data, index)
+ if not handled:
+ return None, None, None
+
el = util.etree.Element("img")
- src_parts = m.group(9).split()
- if src_parts:
- src = src_parts[0]
- if src[0] == "<" and src[-1] == ">":
- src = src[1:-1]
- el.set('src', self.unescape(src))
- else:
- el.set('src', "")
- if len(src_parts) > 1:
- el.set('title', dequote(self.unescape(" ".join(src_parts[1:]))))
+
+ el.set("src", src)
+
+ if title is not None:
+ el.set("title", title)
if self.markdown.enable_attributes:
- truealt = handleAttributes(m.group(2), el)
+ truealt = handleAttributes(text, el)
else:
- truealt = m.group(2)
+ truealt = text
el.set('alt', self.unescape(truealt))
- return el
+ return el, m.start(0), index
-class ReferencePattern(LinkPattern):
+class ReferenceInlineProcessor(LinkInlineProcessor):
""" Match to a stored reference and return link element. """
-
NEWLINE_CLEANUP_RE = re.compile(r'[ ]?\n', re.MULTILINE)
- def handleMatch(self, m):
- try:
- id = m.group(9).lower()
- except IndexError:
- id = None
- if not id:
- # if we got something like "[Google][]" or "[Google]"
- # we'll use "google" as the id
- id = m.group(2).lower()
+ RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
+
+ def handleMatch(self, m, data):
+ text, index, handled = self.getText(data, m.end(0))
+ if not handled:
+ return None, None, None
+
+ id, end, handled = self.evalId(data, index, text)
+ if not handled:
+ return None, None, None
# Clean up linebreaks in id
id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
if id not in self.markdown.references: # ignore undefined refs
- return None
+ return None, m.start(0), end
+
href, title = self.markdown.references[id]
- text = m.group(2)
- return self.makeTag(href, title, text)
+ return self.makeTag(href, title, text), m.start(0), end
+
+ def evalId(self, data, index, text):
+ """
+ Evaluate the id portion of [ref][id].
+
+ If [ref][] use [ref].
+ """
+ m = self.RE_LINK.match(data, pos=index)
+ if not m:
+ return None, index, False
+ else:
+ id = m.group(1).lower()
+ end = m.end(0)
+ if not id:
+ id = text.lower()
+ return id, end, True
def makeTag(self, href, title, text):
el = util.etree.Element('a')
@@ -418,7 +661,15 @@ class ReferencePattern(LinkPattern):
return el
-class ImageReferencePattern(ReferencePattern):
+class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
+ """Shorte form of reference: [google]. """
+ def evalId(self, data, index, text):
+ """Evaluate the id from of [ref] """
+
+ return text.lower(), index, True
+
+
+class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
""" Match to a stored reference and return img element. """
def makeTag(self, href, title, text):
el = util.etree.Element("img")
@@ -433,22 +684,22 @@ class ImageReferencePattern(ReferencePattern):
return el
-class AutolinkPattern(Pattern):
+class AutolinkInlineProcessor(InlineProcessor):
""" Return a link Element given an autolink (`<http://example/com>`). """
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
el = util.etree.Element("a")
- el.set('href', self.unescape(m.group(2)))
- el.text = util.AtomicString(m.group(2))
- return el
+ el.set('href', self.unescape(m.group(1)))
+ el.text = util.AtomicString(m.group(1))
+ return el, m.start(0), m.end(0)
-class AutomailPattern(Pattern):
+class AutomailInlineProcessor(InlineProcessor):
"""
Return a mailto link Element given an automail link (`<foo@example.com>`).
"""
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
el = util.etree.Element('a')
- email = self.unescape(m.group(2))
+ email = self.unescape(m.group(1))
if email.startswith("mailto:"):
email = email[len("mailto:"):]
@@ -467,4 +718,4 @@ class AutomailPattern(Pattern):
mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
ord(letter) for letter in mailto])
el.set('href', mailto)
- return el
+ return el, m.start(0), m.end(0)
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index e2566a4..7c37ae7 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -231,21 +231,38 @@ class InlineProcessor(Treeprocessor):
Returns: String with placeholders instead of ElementTree elements.
"""
+ new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
for exclude in pattern.ANCESTOR_EXCLUDES:
if exclude.lower() in self.ancestors:
return data, False, 0
- match = pattern.getCompiledRegExp().match(data[startIndex:])
- leftData = data[:startIndex]
+ if new_style:
+ match = None
+ # Since handleMatch may reject our first match,
+ # we iterate over the buffer looking for matches
+ # until we can't find any more.
+ for match in pattern.getCompiledRegExp().finditer(data, startIndex):
+ node, start, end = pattern.handleMatch(match, data)
+ if start is None or end is None:
+ startIndex += match.end(0)
+ match = None
+ continue
+ break
+ else: # pragma: no cover
+ match = pattern.getCompiledRegExp().match(data[startIndex:])
+ leftData = data[:startIndex]
if not match:
return data, False, 0
- node = pattern.handleMatch(match)
+ if not new_style: # pragma: no cover
+ node = pattern.handleMatch(match)
+ start = match.start(0)
+ end = match.end(0)
if node is None:
- return data, True, len(leftData)+match.span(len(match.groups()))[0]
+ return data, True, end
if not isString(node):
if not isinstance(node.text, util.AtomicString):
@@ -265,9 +282,13 @@ class InlineProcessor(Treeprocessor):
placeholder = self.__stashNode(node, pattern.type())
- return "%s%s%s%s" % (leftData,
- match.group(1),
- placeholder, match.groups()[-1]), True, 0
+ if new_style:
+ return "%s%s%s" % (data[:start],
+ placeholder, data[end:]), True, 0
+ else: # pragma: no cover
+ return "%s%s%s%s" % (leftData,
+ match.group(1),
+ placeholder, match.groups()[-1]), True, 0
def __build_ancestors(self, parent, parents):
"""Build the ancestor list."""
diff --git a/markdown/util.py b/markdown/util.py
index 8897195..3a36c00 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -14,10 +14,12 @@ if PY3: # pragma: no cover
string_type = str
text_type = str
int2str = chr
+ iterrange = range
else: # pragma: no cover
string_type = basestring # noqa
text_type = unicode # noqa
int2str = unichr # noqa
+ iterrange = xrange # noqa
"""
diff --git a/tests/misc/image.html b/tests/misc/image.html
deleted file mode 100644
index 1171e4e..0000000
--- a/tests/misc/image.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<p><img alt="Poster" src="http://humane_man.jpg" title="The most humane man." /></p>
-<p><img alt="Poster" src="http://humane_man.jpg" title="The most humane man." /></p>
-<p><img alt="Blank" src="" /></p>
-<p>![Fail](http://humane man.jpg "The most humane man.")</p>
-<p>![Fail](http://humane man.jpg)</p> \ No newline at end of file
diff --git a/tests/misc/image.txt b/tests/misc/image.txt
deleted file mode 100644
index 3fae16a..0000000
--- a/tests/misc/image.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-
-![Poster](http://humane_man.jpg "The most humane man.")
-
-![Poster][]
-
-[Poster]:http://humane_man.jpg "The most humane man."
-
-![Blank]()
-
-![Fail](http://humane man.jpg "The most humane man.")
-
-![Fail](http://humane man.jpg)
diff --git a/tests/test_apis.py b/tests/test_apis.py
index aa43e52..15ecc5b 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -753,16 +753,16 @@ class TestEscapeAppend(unittest.TestCase):
class TestAncestorExclusion(unittest.TestCase):
""" Tests exclusion of tags in ancestor list. """
- class AncestorExample(markdown.inlinepatterns.SimpleTagPattern):
+ class AncestorExample(markdown.inlinepatterns.SimpleTagInlineProcessor):
""" Ancestor Test. """
ANCESTOR_EXCLUDES = ('a',)
- def handleMatch(self, m):
+ def handleMatch(self, m, data):
""" Handle match. """
el = markdown.util.etree.Element(self.tag)
- el.text = m.group(3)
- return el
+ el.text = m.group(2)
+ return el, m.start(0), m.end(0)
class AncestorExtension(markdown.Extension):
@@ -774,7 +774,7 @@ class TestAncestorExclusion(unittest.TestCase):
def extendMarkdown(self, md, md_globals):
"""Modify inline patterns."""
- pattern = r'(\+)([^\+]+)\2'
+ pattern = r'(\+)([^\+]+)\1'
md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong')
def setUp(self):
diff --git a/tests/test_syntax/inline/__init__.py b/tests/test_syntax/inline/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/test_syntax/inline/__init__.py
diff --git a/tests/test_syntax/inline/images.py b/tests/test_syntax/inline/images.py
new file mode 100644
index 0000000..9c1dc34
--- /dev/null
+++ b/tests/test_syntax/inline/images.py
@@ -0,0 +1,139 @@
+from markdown.test_tools import TestCase
+
+
+class TestAdvancedImages(TestCase):
+
+ def test_nested_square_brackets(self):
+ self.assertMarkdownRenders(
+ """![Text[[[[[[[]]]]]]][]](http://link.com/image.png) more text""",
+ """<p><img alt="Text[[[[[[[]]]]]]][]" src="http://link.com/image.png" /> more text</p>"""
+ )
+
+ def test_nested_round_brackets(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/(((((((()))))))()).png) more text""",
+ """<p><img alt="Text" src="http://link.com/(((((((()))))))()).png" /> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles1(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/(.png"title") more text""",
+ """<p><img alt="Text" src="http://link.com/(.png" title="title" /> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles2(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/('.png"title") more text""",
+ """<p><img alt="Text" src="http://link.com/('.png" title="title" /> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles3(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/(.png"title)") more text""",
+ """<p><img alt="Text" src="http://link.com/(.png" title="title)" /> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles4(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/(.png "title") more text""",
+ """<p><img alt="Text" src="http://link.com/(.png" title="title" /> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles5(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/(.png "title)") more text""",
+ """<p><img alt="Text" src="http://link.com/(.png" title="title)" /> more text</p>"""
+ )
+
+ def test_mixed_title_quotes1(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/'.png"title") more text""",
+ """<p><img alt="Text" src="http://link.com/'.png" title="title" /> more text</p>"""
+ )
+
+ def test_mixed_title_quotes2(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/".png'title') more text""",
+ """<p><img alt="Text" src="http://link.com/&quot;.png" title="title" /> more text</p>"""
+ )
+
+ def test_mixed_title_quotes3(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/with spaces.png'"and quotes" 'and title') more text""",
+ """<p><img alt="Text" src="http://link.com/with spaces.png" title="&quot;and quotes&quot; 'and title" />"""
+ """ more text</p>"""
+ )
+
+ def test_mixed_title_quotes4(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/with spaces'.png"and quotes" 'and title") more text""",
+ """<p><img alt="Text" src="http://link.com/with spaces'.png" title="and quotes&quot; 'and title" />"""
+ """ more text</p>"""
+ )
+
+ def test_mixed_title_quotes5(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/with spaces .png'"and quotes" 'and title') more text""",
+ """<p><img alt="Text" src="http://link.com/with spaces .png" title="&quot;and quotes&quot;"""
+ """ 'and title" /> more text</p>"""
+ )
+
+ def test_mixed_title_quotes6(self):
+ self.assertMarkdownRenders(
+ """![Text](http://link.com/with spaces "and quotes".png 'and title') more text""",
+ """<p><img alt="Text" src="http://link.com/with spaces &quot;and quotes&quot;.png" title="and title" />"""
+ """ more text</p>"""
+ )
+
+ def test_single_quote(self):
+ self.assertMarkdownRenders(
+ """![test](link"notitle.png)""",
+ """<p><img alt="test" src="link&quot;notitle.png" /></p>"""
+ )
+
+ def test_angle_with_mixed_title_quotes(self):
+ self.assertMarkdownRenders(
+ """![Text](<http://link.com/with spaces '"and quotes".png> 'and title') more text""",
+ """<p><img alt="Text" src="http://link.com/with spaces '&quot;and quotes&quot;.png" title="and title" />"""
+ """ more text</p>"""
+ )
+
+ def test_misc(self):
+ self.assertMarkdownRenders(
+ """![Poster](http://humane_man.jpg "The most humane man.")""",
+ """<p><img alt="Poster" src="http://humane_man.jpg" title="The most humane man." /></p>"""
+ )
+
+ def test_misc_ref(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ """
+ ![Poster][]
+
+ [Poster]:http://humane_man.jpg "The most humane man."
+ """
+ ),
+ self.dedent(
+ """
+ <p><img alt="Poster" src="http://humane_man.jpg" title="The most humane man." /></p>
+ """
+ )
+ )
+
+ def test_misc_blank(self):
+ self.assertMarkdownRenders(
+ """![Blank]()""",
+ """<p><img alt="Blank" src="" /></p>"""
+ )
+
+ def test_misc_img_title(self):
+ self.assertMarkdownRenders(
+ """![Image](http://humane man.jpg "The most humane man.")""",
+ """<p><img alt="Image" src="http://humane man.jpg" title="The most humane man." /></p>"""
+ )
+
+ def test_misc_img(self):
+ self.assertMarkdownRenders(
+ """![Image](http://humane man.jpg)""",
+ """<p><img alt="Image" src="http://humane man.jpg" /></p>"""
+ )
diff --git a/tests/test_syntax/inline/links.py b/tests/test_syntax/inline/links.py
new file mode 100644
index 0000000..fe58ada
--- /dev/null
+++ b/tests/test_syntax/inline/links.py
@@ -0,0 +1,98 @@
+from markdown.test_tools import TestCase
+
+
+class TestAdvancedLinks(TestCase):
+
+ def test_nested_square_brackets(self):
+ self.assertMarkdownRenders(
+ """[Text[[[[[[[]]]]]]][]](http://link.com) more text""",
+ """<p><a href="http://link.com">Text[[[[[[[]]]]]]][]</a> more text</p>"""
+ )
+
+ def test_nested_round_brackets(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/(((((((()))))))())) more text""",
+ """<p><a href="http://link.com/(((((((()))))))())">Text</a> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles1(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/("title") more text""",
+ """<p><a href="http://link.com/(" title="title">Text</a> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles2(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/('"title") more text""",
+ """<p><a href="http://link.com/('" title="title">Text</a> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles3(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/("title)") more text""",
+ """<p><a href="http://link.com/(" title="title)">Text</a> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles4(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/( "title") more text""",
+ """<p><a href="http://link.com/(" title="title">Text</a> more text</p>"""
+ )
+
+ def test_uneven_brackets_with_titles5(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/( "title)") more text""",
+ """<p><a href="http://link.com/(" title="title)">Text</a> more text</p>"""
+ )
+
+ def test_mixed_title_quotes1(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/'"title") more text""",
+ """<p><a href="http://link.com/'" title="title">Text</a> more text</p>"""
+ )
+
+ def test_mixed_title_quotes2(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/"'title') more text""",
+ """<p><a href="http://link.com/&quot;" title="title">Text</a> more text</p>"""
+ )
+
+ def test_mixed_title_quotes3(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/with spaces'"and quotes" 'and title') more text""",
+ """<p><a href="http://link.com/with spaces" title="&quot;and quotes&quot; 'and title">"""
+ """Text</a> more text</p>"""
+ )
+
+ def test_mixed_title_quotes4(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/with spaces'"and quotes" 'and title") more text""",
+ """<p><a href="http://link.com/with spaces'" title="and quotes&quot; 'and title">Text</a> more text</p>"""
+ )
+
+ def test_mixed_title_quotes5(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/with spaces '"and quotes" 'and title') more text""",
+ """<p><a href="http://link.com/with spaces" title="&quot;and quotes&quot; 'and title">"""
+ """Text</a> more text</p>"""
+ )
+
+ def test_mixed_title_quotes6(self):
+ self.assertMarkdownRenders(
+ """[Text](http://link.com/with spaces "and quotes" 'and title') more text""",
+ """<p><a href="http://link.com/with spaces &quot;and quotes&quot;" title="and title">"""
+ """Text</a> more text</p>"""
+ )
+
+ def test_single_quote(self):
+ self.assertMarkdownRenders(
+ """[test](link"notitle)""",
+ """<p><a href="link&quot;notitle">test</a></p>"""
+ )
+
+ def test_angle_with_mixed_title_quotes(self):
+ self.assertMarkdownRenders(
+ """[Text](<http://link.com/with spaces '"and quotes"> 'and title') more text""",
+ """<p><a href="http://link.com/with spaces '&quot;and quotes&quot;" title="and title">"""
+ """Text</a> more text</p>"""
+ )