From 59406c41e7c3548d1c95a2091e2d676323494f62 Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Sun, 29 Jul 2018 12:44:18 -0600 Subject: Fix double escaping of amp in attributes (#670) Serializer should only escape & in attributes if not part of & Better regex avoid Unicode and `_` in amp detection. In general, we don't want to escape already escaped content, but with code content, we want literal representations of escaped content, so have code content explicitly escape its content before placing in AtomicStrings. Closes #669. --- markdown/blockprocessors.py | 4 +- markdown/inlinepatterns.py | 4 +- markdown/serializers.py | 11 ++- markdown/util.py | 11 +++ tests/misc/amp-in-url.html | 1 - tests/misc/amp-in-url.txt | 1 - tests/test_apis.py | 9 ++ tests/test_syntax/inline/images.py | 161 -------------------------------- tests/test_syntax/inline/links.py | 120 ------------------------ tests/test_syntax/inline/test_images.py | 161 ++++++++++++++++++++++++++++++++ tests/test_syntax/inline/test_links.py | 132 ++++++++++++++++++++++++++ 11 files changed, 325 insertions(+), 290 deletions(-) delete mode 100644 tests/misc/amp-in-url.html delete mode 100644 tests/misc/amp-in-url.txt delete mode 100644 tests/test_syntax/inline/images.py delete mode 100644 tests/test_syntax/inline/links.py create mode 100644 tests/test_syntax/inline/test_images.py create mode 100644 tests/test_syntax/inline/test_links.py diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d2c9cd3..378c7c7 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -259,14 +259,14 @@ class CodeBlockProcessor(BlockProcessor): code = sibling[0] block, theRest = self.detab(block) code.text = util.AtomicString( - '%s\n%s\n' % (code.text, block.rstrip()) + '%s\n%s\n' % (code.text, util.code_escape(block.rstrip())) ) else: # This is a new codeblock. Create the elements and insert text. pre = util.etree.SubElement(parent, 'pre') code = util.etree.SubElement(pre, 'code') block, theRest = self.detab(block) - code.text = util.AtomicString('%s\n' % block.rstrip()) + code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip())) if theRest: # This block contained unindented line(s) after the first indented # line. Insert these lines as the first block of the master blocks diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 83edf4b..8d49d07 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -158,7 +158,7 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # & -ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' +ENTITY_RE = r'(&(?:\#[0-9]+|[a-zA-Z0-9]+);)' # two spaces at end of line LINE_BREAK_RE = r' \n' @@ -369,7 +369,7 @@ class BacktickInlineProcessor(InlineProcessor): def handleMatch(self, m, data): if m.group(3): el = util.etree.Element(self.tag) - el.text = util.AtomicString(m.group(3).strip()) + el.text = util.AtomicString(util.code_escape(m.group(3).strip())) return el, m.start(0), m.end(0) else: return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0) diff --git a/markdown/serializers.py b/markdown/serializers.py index 308cf7a..3cfa6bb 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -41,6 +41,7 @@ from __future__ import absolute_import from __future__ import unicode_literals from xml.etree.ElementTree import ProcessingInstruction from . import util +import re ElementTree = util.etree.ElementTree QName = util.etree.QName if hasattr(util.etree, 'test_comment'): # pragma: no cover @@ -52,6 +53,7 @@ __all__ = ['to_html_string', 'to_xhtml_string'] HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "isindex", "link", "meta", "param") +RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|[0-9a-z]+);)', re.I) try: HTML_EMPTY = set(HTML_EMPTY) @@ -72,7 +74,8 @@ def _escape_cdata(text): # shorter than 500 character, or so. assume that's, by far, # the most common case in most applications. if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: @@ -86,7 +89,8 @@ def _escape_attrib(text): # escape attribute value try: if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: @@ -104,7 +108,8 @@ def _escape_attrib_html(text): # escape attribute value try: if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: diff --git a/markdown/util.py b/markdown/util.py index aeb7818..b40c010 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -140,6 +140,17 @@ def parseBoolValue(value, fail_on_errors=True, preserve_none=False): raise ValueError('Cannot parse bool value: %r' % value) +def code_escape(text): + """Escape code.""" + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + + def deprecated(message): """ Raise a DeprecationWarning when wrapped function/method is called. diff --git a/tests/misc/amp-in-url.html b/tests/misc/amp-in-url.html deleted file mode 100644 index 2170a54..0000000 --- a/tests/misc/amp-in-url.html +++ /dev/null @@ -1 +0,0 @@ -

link

\ No newline at end of file diff --git a/tests/misc/amp-in-url.txt b/tests/misc/amp-in-url.txt deleted file mode 100644 index 471106e..0000000 --- a/tests/misc/amp-in-url.txt +++ /dev/null @@ -1 +0,0 @@ -[link](http://www.freewisdom.org/this&that) diff --git a/tests/test_apis.py b/tests/test_apis.py index d9d520d..c813e56 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -666,6 +666,15 @@ class testSerializers(unittest.TestCase): '
' ) + def testQNamePreEscaping(self): + """ Test QName that is already partially escaped. """ + qname = markdown.util.etree.QName('<&"test escaping">', 'div') + el = markdown.util.etree.Element(qname) + self.assertEqual( + markdown.serializers.to_xhtml_string(el), + '
' + ) + def buildExtension(self): """ Build an extension which registers fakeSerializer. """ def fakeSerializer(elem): diff --git a/tests/test_syntax/inline/images.py b/tests/test_syntax/inline/images.py deleted file mode 100644 index 52ce330..0000000 --- a/tests/test_syntax/inline/images.py +++ /dev/null @@ -1,161 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Python Markdown - -A Python implementation of John Gruber's Markdown. - -Documentation: https://python-markdown.github.io/ -GitHub: https://github.com/Python-Markdown/markdown/ -PyPI: https://pypi.org/project/Markdown/ - -Started by Manfred Stienstra (http://www.dwerg.net/). -Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). -Currently maintained by Waylan Limberg (https://github.com/waylan), -Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). - -Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) -Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) -Copyright 2004 Manfred Stienstra (the original version) - -License: BSD (see LICENSE.md for details). -""" - -from markdown.test_tools import TestCase - - -class TestAdvancedImages(TestCase): - - def test_nested_square_brackets(self): - self.assertMarkdownRenders( - """![Text[[[[[[[]]]]]]][]](http://link.com/image.png) more text""", - """

Text[[[[[[[]]]]]]][] more text

""" - ) - - def test_nested_round_brackets(self): - self.assertMarkdownRenders( - """![Text](http://link.com/(((((((()))))))()).png) more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles1(self): - self.assertMarkdownRenders( - """![Text](http://link.com/(.png"title") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles2(self): - self.assertMarkdownRenders( - """![Text](http://link.com/('.png"title") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles3(self): - self.assertMarkdownRenders( - """![Text](http://link.com/(.png"title)") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles4(self): - self.assertMarkdownRenders( - """![Text](http://link.com/(.png "title") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles5(self): - self.assertMarkdownRenders( - """![Text](http://link.com/(.png "title)") more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes1(self): - self.assertMarkdownRenders( - """![Text](http://link.com/'.png"title") more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes2(self): - self.assertMarkdownRenders( - """![Text](http://link.com/".png'title') more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes3(self): - self.assertMarkdownRenders( - """![Text](http://link.com/with spaces.png'"and quotes" 'and title') more text""", - """

Text""" - """ more text

""" - ) - - def test_mixed_title_quotes4(self): - self.assertMarkdownRenders( - """![Text](http://link.com/with spaces'.png"and quotes" 'and title") more text""", - """

Text""" - """ more text

""" - ) - - def test_mixed_title_quotes5(self): - self.assertMarkdownRenders( - """![Text](http://link.com/with spaces .png'"and quotes" 'and title') more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes6(self): - self.assertMarkdownRenders( - """![Text](http://link.com/with spaces "and quotes".png 'and title') more text""", - """

Text""" - """ more text

""" - ) - - def test_single_quote(self): - self.assertMarkdownRenders( - """![test](link"notitle.png)""", - """

test

""" - ) - - def test_angle_with_mixed_title_quotes(self): - self.assertMarkdownRenders( - """![Text]( 'and title') more text""", - """

Text""" - """ more text

""" - ) - - def test_misc(self): - self.assertMarkdownRenders( - """![Poster](http://humane_man.jpg "The most humane man.")""", - """

Poster

""" - ) - - def test_misc_ref(self): - self.assertMarkdownRenders( - self.dedent( - """ - ![Poster][] - - [Poster]:http://humane_man.jpg "The most humane man." - """ - ), - self.dedent( - """ -

Poster

- """ - ) - ) - - def test_misc_blank(self): - self.assertMarkdownRenders( - """![Blank]()""", - """

Blank

""" - ) - - def test_misc_img_title(self): - self.assertMarkdownRenders( - """![Image](http://humane man.jpg "The most humane man.")""", - """

Image

""" - ) - - def test_misc_img(self): - self.assertMarkdownRenders( - """![Image](http://humane man.jpg)""", - """

Image

""" - ) diff --git a/tests/test_syntax/inline/links.py b/tests/test_syntax/inline/links.py deleted file mode 100644 index d74bb75..0000000 --- a/tests/test_syntax/inline/links.py +++ /dev/null @@ -1,120 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Python Markdown - -A Python implementation of John Gruber's Markdown. - -Documentation: https://python-markdown.github.io/ -GitHub: https://github.com/Python-Markdown/markdown/ -PyPI: https://pypi.org/project/Markdown/ - -Started by Manfred Stienstra (http://www.dwerg.net/). -Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). -Currently maintained by Waylan Limberg (https://github.com/waylan), -Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). - -Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) -Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) -Copyright 2004 Manfred Stienstra (the original version) - -License: BSD (see LICENSE.md for details). -""" - -from markdown.test_tools import TestCase - - -class TestAdvancedLinks(TestCase): - - def test_nested_square_brackets(self): - self.assertMarkdownRenders( - """[Text[[[[[[[]]]]]]][]](http://link.com) more text""", - """

Text[[[[[[[]]]]]]][] more text

""" - ) - - def test_nested_round_brackets(self): - self.assertMarkdownRenders( - """[Text](http://link.com/(((((((()))))))())) more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles1(self): - self.assertMarkdownRenders( - """[Text](http://link.com/("title") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles2(self): - self.assertMarkdownRenders( - """[Text](http://link.com/('"title") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles3(self): - self.assertMarkdownRenders( - """[Text](http://link.com/("title)") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles4(self): - self.assertMarkdownRenders( - """[Text](http://link.com/( "title") more text""", - """

Text more text

""" - ) - - def test_uneven_brackets_with_titles5(self): - self.assertMarkdownRenders( - """[Text](http://link.com/( "title)") more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes1(self): - self.assertMarkdownRenders( - """[Text](http://link.com/'"title") more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes2(self): - self.assertMarkdownRenders( - """[Text](http://link.com/"'title') more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes3(self): - self.assertMarkdownRenders( - """[Text](http://link.com/with spaces'"and quotes" 'and title') more text""", - """

""" - """Text more text

""" - ) - - def test_mixed_title_quotes4(self): - self.assertMarkdownRenders( - """[Text](http://link.com/with spaces'"and quotes" 'and title") more text""", - """

Text more text

""" - ) - - def test_mixed_title_quotes5(self): - self.assertMarkdownRenders( - """[Text](http://link.com/with spaces '"and quotes" 'and title') more text""", - """

""" - """Text more text

""" - ) - - def test_mixed_title_quotes6(self): - self.assertMarkdownRenders( - """[Text](http://link.com/with spaces "and quotes" 'and title') more text""", - """

""" - """Text more text

""" - ) - - def test_single_quote(self): - self.assertMarkdownRenders( - """[test](link"notitle)""", - """

test

""" - ) - - def test_angle_with_mixed_title_quotes(self): - self.assertMarkdownRenders( - """[Text]( 'and title') more text""", - """

""" - """Text more text

""" - ) diff --git a/tests/test_syntax/inline/test_images.py b/tests/test_syntax/inline/test_images.py new file mode 100644 index 0000000..52ce330 --- /dev/null +++ b/tests/test_syntax/inline/test_images.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +from markdown.test_tools import TestCase + + +class TestAdvancedImages(TestCase): + + def test_nested_square_brackets(self): + self.assertMarkdownRenders( + """![Text[[[[[[[]]]]]]][]](http://link.com/image.png) more text""", + """

Text[[[[[[[]]]]]]][] more text

""" + ) + + def test_nested_round_brackets(self): + self.assertMarkdownRenders( + """![Text](http://link.com/(((((((()))))))()).png) more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles1(self): + self.assertMarkdownRenders( + """![Text](http://link.com/(.png"title") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles2(self): + self.assertMarkdownRenders( + """![Text](http://link.com/('.png"title") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles3(self): + self.assertMarkdownRenders( + """![Text](http://link.com/(.png"title)") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles4(self): + self.assertMarkdownRenders( + """![Text](http://link.com/(.png "title") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles5(self): + self.assertMarkdownRenders( + """![Text](http://link.com/(.png "title)") more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes1(self): + self.assertMarkdownRenders( + """![Text](http://link.com/'.png"title") more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes2(self): + self.assertMarkdownRenders( + """![Text](http://link.com/".png'title') more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes3(self): + self.assertMarkdownRenders( + """![Text](http://link.com/with spaces.png'"and quotes" 'and title') more text""", + """

Text""" + """ more text

""" + ) + + def test_mixed_title_quotes4(self): + self.assertMarkdownRenders( + """![Text](http://link.com/with spaces'.png"and quotes" 'and title") more text""", + """

Text""" + """ more text

""" + ) + + def test_mixed_title_quotes5(self): + self.assertMarkdownRenders( + """![Text](http://link.com/with spaces .png'"and quotes" 'and title') more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes6(self): + self.assertMarkdownRenders( + """![Text](http://link.com/with spaces "and quotes".png 'and title') more text""", + """

Text""" + """ more text

""" + ) + + def test_single_quote(self): + self.assertMarkdownRenders( + """![test](link"notitle.png)""", + """

test

""" + ) + + def test_angle_with_mixed_title_quotes(self): + self.assertMarkdownRenders( + """![Text]( 'and title') more text""", + """

Text""" + """ more text

""" + ) + + def test_misc(self): + self.assertMarkdownRenders( + """![Poster](http://humane_man.jpg "The most humane man.")""", + """

Poster

""" + ) + + def test_misc_ref(self): + self.assertMarkdownRenders( + self.dedent( + """ + ![Poster][] + + [Poster]:http://humane_man.jpg "The most humane man." + """ + ), + self.dedent( + """ +

Poster

+ """ + ) + ) + + def test_misc_blank(self): + self.assertMarkdownRenders( + """![Blank]()""", + """

Blank

""" + ) + + def test_misc_img_title(self): + self.assertMarkdownRenders( + """![Image](http://humane man.jpg "The most humane man.")""", + """

Image

""" + ) + + def test_misc_img(self): + self.assertMarkdownRenders( + """![Image](http://humane man.jpg)""", + """

Image

""" + ) diff --git a/tests/test_syntax/inline/test_links.py b/tests/test_syntax/inline/test_links.py new file mode 100644 index 0000000..3e8593f --- /dev/null +++ b/tests/test_syntax/inline/test_links.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +from markdown.test_tools import TestCase + + +class TestAdvancedLinks(TestCase): + + def test_nested_square_brackets(self): + self.assertMarkdownRenders( + """[Text[[[[[[[]]]]]]][]](http://link.com) more text""", + """

Text[[[[[[[]]]]]]][] more text

""" + ) + + def test_nested_round_brackets(self): + self.assertMarkdownRenders( + """[Text](http://link.com/(((((((()))))))())) more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles1(self): + self.assertMarkdownRenders( + """[Text](http://link.com/("title") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles2(self): + self.assertMarkdownRenders( + """[Text](http://link.com/('"title") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles3(self): + self.assertMarkdownRenders( + """[Text](http://link.com/("title)") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles4(self): + self.assertMarkdownRenders( + """[Text](http://link.com/( "title") more text""", + """

Text more text

""" + ) + + def test_uneven_brackets_with_titles5(self): + self.assertMarkdownRenders( + """[Text](http://link.com/( "title)") more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes1(self): + self.assertMarkdownRenders( + """[Text](http://link.com/'"title") more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes2(self): + self.assertMarkdownRenders( + """[Text](http://link.com/"'title') more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes3(self): + self.assertMarkdownRenders( + """[Text](http://link.com/with spaces'"and quotes" 'and title') more text""", + """

""" + """Text more text

""" + ) + + def test_mixed_title_quotes4(self): + self.assertMarkdownRenders( + """[Text](http://link.com/with spaces'"and quotes" 'and title") more text""", + """

Text more text

""" + ) + + def test_mixed_title_quotes5(self): + self.assertMarkdownRenders( + """[Text](http://link.com/with spaces '"and quotes" 'and title') more text""", + """

""" + """Text more text

""" + ) + + def test_mixed_title_quotes6(self): + self.assertMarkdownRenders( + """[Text](http://link.com/with spaces "and quotes" 'and title') more text""", + """

""" + """Text more text

""" + ) + + def test_single_quote(self): + self.assertMarkdownRenders( + """[test](link"notitle)""", + """

test

""" + ) + + def test_angle_with_mixed_title_quotes(self): + self.assertMarkdownRenders( + """[Text]( 'and title') more text""", + """

""" + """Text more text

""" + ) + + def test_amp_in_url(self): + """Test amp in URLs.""" + + self.assertMarkdownRenders( + '[link](http://www.freewisdom.org/this&that)', + '

link

' + ) + self.assertMarkdownRenders( + '[title](http://example.com/?a=1&b=2)', + '

title

' + ) -- cgit v1.2.3