diff options
author | Isaac Muse <faceless.shop@gmail.com> | 2018-07-29 12:44:18 -0600 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2018-07-29 14:44:18 -0400 |
commit | 59406c41e7c3548d1c95a2091e2d676323494f62 (patch) | |
tree | 48eabd91c037842674cb2af0454a90de97eb6f1b /markdown/serializers.py | |
parent | 0081cb8519ebda441b129462e8eb6c0f6c7d30a4 (diff) | |
download | markdown-59406c41e7c3548d1c95a2091e2d676323494f62.tar.gz markdown-59406c41e7c3548d1c95a2091e2d676323494f62.tar.bz2 markdown-59406c41e7c3548d1c95a2091e2d676323494f62.zip |
Fix double escaping of amp in attributes (#670)
Serializer should only escape & in attributes if not part of &
Better regex avoid Unicode and `_` in amp detection.
In general, we don't want to escape already escaped content, but with code content, we want literal representations of escaped content, so have code content explicitly escape its content before placing in AtomicStrings.
Closes #669.
Diffstat (limited to 'markdown/serializers.py')
-rw-r--r-- | markdown/serializers.py | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/markdown/serializers.py b/markdown/serializers.py index 308cf7a..3cfa6bb 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -41,6 +41,7 @@ from __future__ import absolute_import from __future__ import unicode_literals from xml.etree.ElementTree import ProcessingInstruction from . import util +import re ElementTree = util.etree.ElementTree QName = util.etree.QName if hasattr(util.etree, 'test_comment'): # pragma: no cover @@ -52,6 +53,7 @@ __all__ = ['to_html_string', 'to_xhtml_string'] HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "isindex", "link", "meta", "param") +RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|[0-9a-z]+);)', re.I) try: HTML_EMPTY = set(HTML_EMPTY) @@ -72,7 +74,8 @@ def _escape_cdata(text): # shorter than 500 character, or so. assume that's, by far, # the most common case in most applications. if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: @@ -86,7 +89,8 @@ def _escape_attrib(text): # escape attribute value try: if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: @@ -104,7 +108,8 @@ def _escape_attrib_html(text): # escape attribute value try: if "&" in text: - text = text.replace("&", "&") + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) if "<" in text: text = text.replace("<", "<") if ">" in text: |