diff options
author | Waylan Limberg <waylan.limberg@icloud.com> | 2014-11-20 16:07:03 -0500 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2014-11-20 16:07:03 -0500 |
commit | 8f66a94eab1389d97041944ed24afd2bf7c4389c (patch) | |
tree | 10b53664076650be951468cbbb163f3d637e5891 /markdown/inlinepatterns.py | |
parent | 0c2143819ef7de53be52f7a4d47e027ff194a9b4 (diff) | |
download | markdown-8f66a94eab1389d97041944ed24afd2bf7c4389c.tar.gz markdown-8f66a94eab1389d97041944ed24afd2bf7c4389c.tar.bz2 markdown-8f66a94eab1389d97041944ed24afd2bf7c4389c.zip |
Flake8 cleanup (mostly whitespace).
Got all but a couple files in the tests (ran out of time today).
Apparently I have been using some bad form for years (although a few
things seemed to look better before the update). Anyway, conformant now.
Diffstat (limited to 'markdown/inlinepatterns.py')
-rw-r--r-- | markdown/inlinepatterns.py | 144 |
1 files changed, 95 insertions, 49 deletions
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index c9d82fd..27690bf 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -46,13 +46,13 @@ from __future__ import unicode_literals from . import util from . import odict import re -try: #pragma: no cover +try: # pragma: no cover from urllib.parse import urlparse, urlunparse -except ImportError: #pragma: no cover +except ImportError: # pragma: no cover from urlparse import urlparse, urlunparse -try: #pragma: no cover +try: # pragma: no cover from html import entities -except ImportError: #pragma: no cover +except ImportError: # pragma: no cover import htmlentitydefs as entities @@ -64,10 +64,12 @@ def build_inlinepatterns(md_instance, **kwargs): inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance) inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance) inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance) - inlinePatterns["image_reference"] = \ - ImageReferencePattern(IMAGE_REFERENCE_RE, md_instance) - inlinePatterns["short_reference"] = \ - ReferencePattern(SHORT_REF_RE, md_instance) + inlinePatterns["image_reference"] = ImageReferencePattern( + IMAGE_REFERENCE_RE, md_instance + ) + inlinePatterns["short_reference"] = ReferencePattern( + SHORT_REF_RE, md_instance + ) inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') @@ -91,47 +93,84 @@ The actual regular expressions for patterns """ NOBRACKET = r'[^\]\[]*' -BRK = ( r'\[(' - + (NOBRACKET + r'(\[')*6 - + (NOBRACKET+ r'\])*')*6 - + NOBRACKET + r')\]' ) +BRK = ( + r'\[(' + + (NOBRACKET + r'(\[')*6 + + (NOBRACKET + r'\])*')*6 + + NOBRACKET + r')\]' +) NOIMG = r'(?<!\!)' -BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' # `e=f()` or ``e=f("`")`` -ESCAPE_RE = r'\\(.)' # \< -EMPHASIS_RE = r'(\*)([^\*]+)\2' # *emphasis* -STRONG_RE = r'(\*{2}|_{2})(.+?)\2' # **strong** -EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}' # ***strongem*** or ***em*strong** -STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2' # ***strong**em* -SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)' # _smart_emphasis_ -EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_ -LINK_RE = NOIMG + BRK + \ -r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)''' +# `e=f()` or ``e=f("`")`` +BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' + +# \< +ESCAPE_RE = r'\\(.)' + +# *emphasis* +EMPHASIS_RE = r'(\*)([^\*]+)\2' + +# **strong** +STRONG_RE = r'(\*{2}|_{2})(.+?)\2' + +# ***strongem*** or ***em*strong** +EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}' + +# ***strong**em* +STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2' + +# _smart_emphasis_ +SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)' + +# _emphasis_ +EMPHASIS_2_RE = r'(_)(.+?)\2' + # [text](url) or [text](<url>) or [text](url "title") +LINK_RE = NOIMG + BRK + \ + r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)''' -IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^")]+"[^"]*"|[^\)]*))\)' # ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>) -REFERENCE_RE = NOIMG + BRK+ r'\s?\[([^\]]*)\]' # [Google][3] -SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]' # [Google] -IMAGE_REFERENCE_RE = r'\!' + BRK + '\s?\[([^\]]*)\]' # ![alt text][2] -NOT_STRONG_RE = r'((^| )(\*|_)( |$))' # stand-alone * or _ -AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' # <http://www.123.com> -AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com> +IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^")]+"[^"]*"|[^\)]*))\)' + +# [Google][3] +REFERENCE_RE = NOIMG + BRK + r'\s?\[([^\]]*)\]' + +# [Google] +SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]' + +# ![alt text][2] +IMAGE_REFERENCE_RE = r'\!' + BRK + '\s?\[([^\]]*)\]' + +# stand-alone * or _ +NOT_STRONG_RE = r'((^| )(\*|_)( |$))' -HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...> -ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & -LINE_BREAK_RE = r' \n' # two spaces at end of line +# <http://www.123.com> +AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' + +# <me@example.com> +AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' + +# <...> +HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' + +# & +ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' + +# two spaces at end of line +LINE_BREAK_RE = r' \n' def dequote(string): """Remove quotes from around a string.""" - if ( ( string.startswith('"') and string.endswith('"')) - or (string.startswith("'") and string.endswith("'")) ): + if ((string.startswith('"') and string.endswith('"')) + or (string.startswith("'") and string.endswith("'"))): return string[1:-1] else: return string -ATTR_RE = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123} + +ATTR_RE = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123} + def handleAttributes(text, parent): """Set values of an element based on attribute definitions ({@id=123}).""" @@ -145,6 +184,7 @@ The pattern classes ----------------------------------------------------------------------------- """ + class Pattern(object): """Base class that inline patterns subclass. """ @@ -180,7 +220,7 @@ class Pattern(object): * m: A re match object containing a match of the pattern. """ - pass #pragma: no cover + pass # pragma: no cover def type(self): """ Return class name, to define pattern type """ @@ -190,9 +230,10 @@ class Pattern(object): """ Return unescaped text given text with an inline placeholder. """ try: stash = self.markdown.treeprocessors['inline'].stashed_nodes - except KeyError: #pragma: no cover + except KeyError: # pragma: no cover return text - def itertext(el): #pragma: no cover + + def itertext(el): # pragma: no cover ' Reimplement Element.itertext for older python versions ' tag = el.tag if not isinstance(tag, util.string_type) and tag is not None: @@ -204,6 +245,7 @@ class Pattern(object): yield s if e.tail: yield e.tail + def get_stash(m): id = m.group(1) if id in stash: @@ -239,7 +281,7 @@ class SimpleTagPattern(Pattern): of a Pattern. """ - def __init__ (self, pattern, tag): + def __init__(self, pattern, tag): Pattern.__init__(self, pattern) self.tag = tag @@ -251,13 +293,13 @@ class SimpleTagPattern(Pattern): class SubstituteTagPattern(SimpleTagPattern): """ Return an element of type `tag` with no children. """ - def handleMatch (self, m): + def handleMatch(self, m): return util.etree.Element(self.tag) class BacktickPattern(Pattern): """ Return a `<code>` element containing the matching text. """ - def __init__ (self, pattern): + def __init__(self, pattern): Pattern.__init__(self, pattern) self.tag = "code" @@ -278,14 +320,14 @@ class DoubleTagPattern(SimpleTagPattern): el1 = util.etree.Element(tag1) el2 = util.etree.SubElement(el1, tag2) el2.text = m.group(3) - if len(m.groups())==5: + if len(m.groups()) == 5: el2.tail = m.group(4) return el1 class HtmlPattern(Pattern): """ Store raw inline html and return a placeholder. """ - def handleMatch (self, m): + def handleMatch(self, m): rawhtml = self.unescape(m.group(2)) place_holder = self.markdown.htmlStash.store(rawhtml) return place_holder @@ -294,8 +336,9 @@ class HtmlPattern(Pattern): """ Return unescaped text given text with an inline placeholder. """ try: stash = self.markdown.treeprocessors['inline'].stashed_nodes - except KeyError: #pragma: no cover + except KeyError: # pragma: no cover return text + def get_stash(m): id = m.group(1) value = stash.get(id) @@ -351,7 +394,7 @@ class LinkPattern(Pattern): try: scheme, netloc, path, params, query, fragment = url = urlparse(url) - except ValueError: #pragma: no cover + except ValueError: # pragma: no cover # Bad url - so bad it couldn't be parsed. return '' @@ -361,18 +404,20 @@ class LinkPattern(Pattern): # Not a known (allowed) scheme. Not safe. return '' - if netloc == '' and scheme not in locless_schemes: #pragma: no cover + if netloc == '' and scheme not in locless_schemes: # pragma: no cover # This should not happen. Treat as suspect. return '' for part in url[2:]: if ":" in part: - # A colon in "path", "parameters", "query" or "fragment" is suspect. + # A colon in "path", "parameters", "query" + # or "fragment" is suspect. return '' # Url passes all tests. Return url as-is. return urlunparse(url) + class ImagePattern(LinkPattern): """ Return a img element from the given match. """ def handleMatch(self, m): @@ -396,6 +441,7 @@ class ImagePattern(LinkPattern): el.set('alt', self.unescape(truealt)) return el + class ReferencePattern(LinkPattern): """ Match to a stored reference and return link element. """ @@ -413,7 +459,7 @@ class ReferencePattern(LinkPattern): # Clean up linebreaks in id id = self.NEWLINE_CLEANUP_RE.sub(' ', id) - if not id in self.markdown.references: # ignore undefined refs + if id not in self.markdown.references: # ignore undefined refs return None href, title = self.markdown.references[id] @@ -454,6 +500,7 @@ class AutolinkPattern(Pattern): el.text = util.AtomicString(m.group(2)) return el + class AutomailPattern(Pattern): """ Return a mailto link Element given an automail link (`<foo@example.com>`). @@ -480,4 +527,3 @@ class AutomailPattern(Pattern): ord(letter) for letter in mailto]) el.set('href', mailto) return el - |