aboutsummaryrefslogtreecommitdiffstats
path: root/markdown/serializers.py
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2018-07-29 12:44:18 -0600
committerWaylan Limberg <waylan.limberg@icloud.com>2018-07-29 14:44:18 -0400
commit59406c41e7c3548d1c95a2091e2d676323494f62 (patch)
tree48eabd91c037842674cb2af0454a90de97eb6f1b /markdown/serializers.py
parent0081cb8519ebda441b129462e8eb6c0f6c7d30a4 (diff)
downloadmarkdown-59406c41e7c3548d1c95a2091e2d676323494f62.tar.gz
markdown-59406c41e7c3548d1c95a2091e2d676323494f62.tar.bz2
markdown-59406c41e7c3548d1c95a2091e2d676323494f62.zip
Fix double escaping of amp in attributes (#670)
Serializer should only escape & in attributes if not part of &amp; Better regex avoid Unicode and `_` in amp detection. In general, we don't want to escape already escaped content, but with code content, we want literal representations of escaped content, so have code content explicitly escape its content before placing in AtomicStrings. Closes #669.
Diffstat (limited to 'markdown/serializers.py')
-rw-r--r--markdown/serializers.py11
1 files changed, 8 insertions, 3 deletions
diff --git a/markdown/serializers.py b/markdown/serializers.py
index 308cf7a..3cfa6bb 100644
--- a/markdown/serializers.py
+++ b/markdown/serializers.py
@@ -41,6 +41,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from xml.etree.ElementTree import ProcessingInstruction
from . import util
+import re
ElementTree = util.etree.ElementTree
QName = util.etree.QName
if hasattr(util.etree, 'test_comment'): # pragma: no cover
@@ -52,6 +53,7 @@ __all__ = ['to_html_string', 'to_xhtml_string']
HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param")
+RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|[0-9a-z]+);)', re.I)
try:
HTML_EMPTY = set(HTML_EMPTY)
@@ -72,7 +74,8 @@ def _escape_cdata(text):
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
@@ -86,7 +89,8 @@ def _escape_attrib(text):
# escape attribute value
try:
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
@@ -104,7 +108,8 @@ def _escape_attrib_html(text):
# escape attribute value
try:
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text: