diff options
-rw-r--r-- | markdown/blockprocessors.py | 4 | ||||
-rw-r--r-- | markdown/inlinepatterns.py | 4 | ||||
-rw-r--r-- | markdown/serializers.py | 11 | ||||
-rw-r--r-- | markdown/util.py | 11 | ||||
-rw-r--r-- | tests/misc/amp-in-url.html | 1 | ||||
-rw-r--r-- | tests/misc/amp-in-url.txt | 1 | ||||
-rw-r--r-- | tests/test_apis.py | 9 | ||||
-rw-r--r-- | tests/test_syntax/inline/test_images.py (renamed from tests/test_syntax/inline/images.py) | 0 | ||||
-rw-r--r-- | tests/test_syntax/inline/test_links.py (renamed from tests/test_syntax/inline/links.py) | 12 |
9 files changed, 44 insertions, 9 deletions
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d2c9cd3..378c7c7 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -259,14 +259,14 @@ class CodeBlockProcessor(BlockProcessor): code = sibling[0] block, theRest = self.detab(block) code.text = util.AtomicString( - '%s\n%s\n' % (code.text, block.rstrip()) + '%s\n%s\n' % (code.text, util.code_escape(block.rstrip())) ) else: # This is a new codeblock. Create the elements and insert text. pre = util.etree.SubElement(parent, 'pre') code = util.etree.SubElement(pre, 'code') block, theRest = self.detab(block) - code.text = util.AtomicString('%s\n' % block.rstrip()) + code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip())) if theRest: # This block contained unindented line(s) after the first indented # line. Insert these lines as the first block of the master blocks diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 83edf4b..8d49d07 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -158,7 +158,7 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # & -ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' +ENTITY_RE = r'(&(?:\#[0-9]+|[a-zA-Z0-9]+);)' # two spaces at end of line LINE_BREAK_RE = r' \n' @@ -369,7 +369,7 @@ class BacktickInlineProcessor(InlineProcessor): def handleMatch(self, m, data): if m.group(3): el = util.etree.Element(self.tag) - el.text = util.AtomicString(m.group(3).strip()) + el.text = util.AtomicString(util.code_escape(m.group(3).strip())) return el, m.start(0), m.end(0) else: return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0) diff --git a/markdown/serializers.py b/markdown/serializers.py index 308cf7a..3cfa6bb 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -41,6 +41,7 @@ from __future__ import absolute_import from __future__ import unicode_literals from xml.etree.ElementTree import ProcessingInstruction from . import util +import re ElementTree = util.etree.ElementTree QName = util.etree.QName if hasattr(util.etree, 'test_comment'): # pragma: no cover @@ -52,6 +53,7 @@ __all__ = ['to_html_string', 'to_xhtml_string'] HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "isindex", "link", "meta", "param") +RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|[0-9a-z]+);)', re.I) try: HTML_EMPTY = set(HTML_EMPTY) @@ -72,7 +74,8 @@ def _escape_cdata(text): # shorter than 500 character, or so. assume that's, by far, # the most common case in most applications. if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: @@ -86,7 +89,8 @@ def _escape_attrib(text): # escape attribute value try: if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: @@ -104,7 +108,8 @@ def _escape_attrib_html(text): # escape attribute value try: if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: diff --git a/markdown/util.py b/markdown/util.py index aeb7818..b40c010 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -140,6 +140,17 @@ def parseBoolValue(value, fail_on_errors=True, preserve_none=False): raise ValueError('Cannot parse bool value: %r' % value) +def code_escape(text): + """Escape code.""" + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + + def deprecated(message): """ Raise a DeprecationWarning when wrapped function/method is called. diff --git a/tests/misc/amp-in-url.html b/tests/misc/amp-in-url.html deleted file mode 100644 index 2170a54..0000000 --- a/tests/misc/amp-in-url.html +++ /dev/null @@ -1 +0,0 @@ -<p><a href="http://www.freewisdom.org/this&that">link</a></p>
\ No newline at end of file diff --git a/tests/misc/amp-in-url.txt b/tests/misc/amp-in-url.txt deleted file mode 100644 index 471106e..0000000 --- a/tests/misc/amp-in-url.txt +++ /dev/null @@ -1 +0,0 @@ -[link](http://www.freewisdom.org/this&that) diff --git a/tests/test_apis.py b/tests/test_apis.py index d9d520d..c813e56 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -666,6 +666,15 @@ class testSerializers(unittest.TestCase): '<div xmlns="<&"test escaping">"></div>' ) + def testQNamePreEscaping(self): + """ Test QName that is already partially escaped. """ + qname = markdown.util.etree.QName('<&"test escaping">', 'div') + el = markdown.util.etree.Element(qname) + self.assertEqual( + markdown.serializers.to_xhtml_string(el), + '<div xmlns="<&"test escaping">"></div>' + ) + def buildExtension(self): """ Build an extension which registers fakeSerializer. """ def fakeSerializer(elem): diff --git a/tests/test_syntax/inline/images.py b/tests/test_syntax/inline/test_images.py index 52ce330..52ce330 100644 --- a/tests/test_syntax/inline/images.py +++ b/tests/test_syntax/inline/test_images.py diff --git a/tests/test_syntax/inline/links.py b/tests/test_syntax/inline/test_links.py index d74bb75..3e8593f 100644 --- a/tests/test_syntax/inline/links.py +++ b/tests/test_syntax/inline/test_links.py @@ -118,3 +118,15 @@ class TestAdvancedLinks(TestCase): """<p><a href="http://link.com/with spaces '"and quotes"" title="and title">""" """Text</a> more text</p>""" ) + + def test_amp_in_url(self): + """Test amp in URLs.""" + + self.assertMarkdownRenders( + '[link](http://www.freewisdom.org/this&that)', + '<p><a href="http://www.freewisdom.org/this&that">link</a></p>' + ) + self.assertMarkdownRenders( + '[title](http://example.com/?a=1&b=2)', + '<p><a href="http://example.com/?a=1&b=2">title</a></p>' + ) |