From e5b2813bbf02710c7deb148896085a3dac4828dc Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 17 Aug 2011 11:45:54 -0400 Subject: Fixed #39. Refactored escaping so that it only escapes a predifined set of chars (the set defined by JG in the syntax rules). All other backslashes are passed through unaltered by the parser. If extensions want to add to the escapable chars, they can append to the list at markdown.ESCAPED_CHARS. --- markdown/__init__.py | 3 +++ markdown/inlinepatterns.py | 16 +++++++++++++--- markdown/postprocessors.py | 15 +++++++++++++-- tests/basic/backlash-escapes.html | 1 - tests/basic/backlash-escapes.txt | 2 -- 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/markdown/__init__.py b/markdown/__init__.py index f7957a5..2aafcf6 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -73,6 +73,9 @@ class Markdown: 'xhtml5': to_xhtml_string, } + ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', + '(', ')', '#', '+', '-', '.', '!'] + def __init__(self, *args, **kwargs): """ Creates a new Markdown instance. diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 35f0335..1e05e00 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -59,7 +59,7 @@ def build_inlinepatterns(md_instance, **kwargs): """ Build the default set of inline patterns for Markdown. """ inlinePatterns = odict.OrderedDict() inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE) - inlinePatterns["escape"] = SimpleTextPattern(ESCAPE_RE) + inlinePatterns["escape"] = EscapePattern(ESCAPE_RE, md_instance) inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance) inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance) inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance) @@ -197,8 +197,6 @@ class Pattern: return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) -BasePattern = Pattern # for backward compatibility - class SimpleTextPattern(Pattern): """ Return a simple text of group(2) of a Pattern. """ def handleMatch(self, m): @@ -207,6 +205,18 @@ class SimpleTextPattern(Pattern): return None return text + +class EscapePattern(Pattern): + """ Return an escaped character. """ + + def handleMatch(self, m): + char = m.group(2) + if char in self.markdown.ESCAPED_CHARS: + return '%s%s%s' % (util.STX, ord(char), util.ETX) + else: + return '\\%s' % char + + class SimpleTagPattern(Pattern): """ Return element of type `tag` with a text attribute of group(3) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index b646027..b21a569 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -17,6 +17,7 @@ def build_postprocessors(md_instance, **kwargs): postprocessors = odict.OrderedDict() postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance) postprocessors["amp_substitute"] = AndSubstitutePostprocessor() + postprocessors["unescape"] = UnescapePostprocessor() return postprocessors @@ -91,9 +92,19 @@ class RawHtmlPostprocessor(Postprocessor): class AndSubstitutePostprocessor(Postprocessor): """ Restore valid entities """ - def __init__(self): - pass def run(self, text): text = text.replace(util.AMP_SUBSTITUTE, "&") return text + + +class UnescapePostprocessor(Postprocessor): + """ Restore escaped chars """ + + RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX)) + + def unescape(self, m): + return unichr(int(m.group(1))) + + def run(self, text): + return self.RE.sub(self.unescape, text) diff --git a/tests/basic/backlash-escapes.html b/tests/basic/backlash-escapes.html index 876775f..e2a9445 100644 --- a/tests/basic/backlash-escapes.html +++ b/tests/basic/backlash-escapes.html @@ -9,7 +9,6 @@

Right bracket: ]

Left paren: (

Right paren: )

-

Greater-than: >

Hash: #

Period: .

Bang: !

diff --git a/tests/basic/backlash-escapes.txt b/tests/basic/backlash-escapes.txt index 16447a0..ffb3cd0 100644 --- a/tests/basic/backlash-escapes.txt +++ b/tests/basic/backlash-escapes.txt @@ -20,8 +20,6 @@ Left paren: \( Right paren: \) -Greater-than: \> - Hash: \# Period: \. -- cgit v1.2.3