aboutsummaryrefslogtreecommitdiffstats
path: root/markdown/serializers.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/serializers.py')
-rw-r--r--markdown/serializers.py11
1 files changed, 8 insertions, 3 deletions
diff --git a/markdown/serializers.py b/markdown/serializers.py
index 308cf7a..3cfa6bb 100644
--- a/markdown/serializers.py
+++ b/markdown/serializers.py
@@ -41,6 +41,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from xml.etree.ElementTree import ProcessingInstruction
from . import util
+import re
ElementTree = util.etree.ElementTree
QName = util.etree.QName
if hasattr(util.etree, 'test_comment'): # pragma: no cover
@@ -52,6 +53,7 @@ __all__ = ['to_html_string', 'to_xhtml_string']
HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param")
+RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|[0-9a-z]+);)', re.I)
try:
HTML_EMPTY = set(HTML_EMPTY)
@@ -72,7 +74,8 @@ def _escape_cdata(text):
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
if "&" in text:
- text = text.replace("&", "&")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
@@ -86,7 +89,8 @@ def _escape_attrib(text):
# escape attribute value
try:
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
@@ -104,7 +108,8 @@ def _escape_attrib_html(text):
# escape attribute value
try:
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text: