aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--markdown/blockprocessors.py4
-rw-r--r--markdown/inlinepatterns.py4
-rw-r--r--markdown/serializers.py11
-rw-r--r--markdown/util.py11
-rw-r--r--tests/misc/amp-in-url.html1
-rw-r--r--tests/misc/amp-in-url.txt1
-rw-r--r--tests/test_apis.py9
-rw-r--r--tests/test_syntax/inline/test_images.py (renamed from tests/test_syntax/inline/images.py)0
-rw-r--r--tests/test_syntax/inline/test_links.py (renamed from tests/test_syntax/inline/links.py)12
9 files changed, 44 insertions, 9 deletions
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index d2c9cd3..378c7c7 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -259,14 +259,14 @@ class CodeBlockProcessor(BlockProcessor):
code = sibling[0]
block, theRest = self.detab(block)
code.text = util.AtomicString(
- '%s\n%s\n' % (code.text, block.rstrip())
+ '%s\n%s\n' % (code.text, util.code_escape(block.rstrip()))
)
else:
# This is a new codeblock. Create the elements and insert text.
pre = util.etree.SubElement(parent, 'pre')
code = util.etree.SubElement(pre, 'code')
block, theRest = self.detab(block)
- code.text = util.AtomicString('%s\n' % block.rstrip())
+ code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip()))
if theRest:
# This block contained unindented line(s) after the first indented
# line. Insert these lines as the first block of the master blocks
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index 83edf4b..8d49d07 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -158,7 +158,7 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
# &amp;
-ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'
+ENTITY_RE = r'(&(?:\#[0-9]+|[a-zA-Z0-9]+);)'
# two spaces at end of line
LINE_BREAK_RE = r' \n'
@@ -369,7 +369,7 @@ class BacktickInlineProcessor(InlineProcessor):
def handleMatch(self, m, data):
if m.group(3):
el = util.etree.Element(self.tag)
- el.text = util.AtomicString(m.group(3).strip())
+ el.text = util.AtomicString(util.code_escape(m.group(3).strip()))
return el, m.start(0), m.end(0)
else:
return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
diff --git a/markdown/serializers.py b/markdown/serializers.py
index 308cf7a..3cfa6bb 100644
--- a/markdown/serializers.py
+++ b/markdown/serializers.py
@@ -41,6 +41,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from xml.etree.ElementTree import ProcessingInstruction
from . import util
+import re
ElementTree = util.etree.ElementTree
QName = util.etree.QName
if hasattr(util.etree, 'test_comment'): # pragma: no cover
@@ -52,6 +53,7 @@ __all__ = ['to_html_string', 'to_xhtml_string']
HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
"img", "input", "isindex", "link", "meta", "param")
+RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|[0-9a-z]+);)', re.I)
try:
HTML_EMPTY = set(HTML_EMPTY)
@@ -72,7 +74,8 @@ def _escape_cdata(text):
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
@@ -86,7 +89,8 @@ def _escape_attrib(text):
# escape attribute value
try:
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
@@ -104,7 +108,8 @@ def _escape_attrib_html(text):
# escape attribute value
try:
if "&" in text:
- text = text.replace("&", "&amp;")
+ # Only replace & when not part of an entity
+ text = RE_AMP.sub('&amp;', text)
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
diff --git a/markdown/util.py b/markdown/util.py
index aeb7818..b40c010 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -140,6 +140,17 @@ def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
raise ValueError('Cannot parse bool value: %r' % value)
+def code_escape(text):
+ """Escape code."""
+ if "&" in text:
+ text = text.replace("&", "&amp;")
+ if "<" in text:
+ text = text.replace("<", "&lt;")
+ if ">" in text:
+ text = text.replace(">", "&gt;")
+ return text
+
+
def deprecated(message):
"""
Raise a DeprecationWarning when wrapped function/method is called.
diff --git a/tests/misc/amp-in-url.html b/tests/misc/amp-in-url.html
deleted file mode 100644
index 2170a54..0000000
--- a/tests/misc/amp-in-url.html
+++ /dev/null
@@ -1 +0,0 @@
-<p><a href="http://www.freewisdom.org/this&amp;that">link</a></p> \ No newline at end of file
diff --git a/tests/misc/amp-in-url.txt b/tests/misc/amp-in-url.txt
deleted file mode 100644
index 471106e..0000000
--- a/tests/misc/amp-in-url.txt
+++ /dev/null
@@ -1 +0,0 @@
-[link](http://www.freewisdom.org/this&that)
diff --git a/tests/test_apis.py b/tests/test_apis.py
index d9d520d..c813e56 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -666,6 +666,15 @@ class testSerializers(unittest.TestCase):
'<div xmlns="&lt;&amp;&quot;test&#10;escaping&quot;&gt;"></div>'
)
+ def testQNamePreEscaping(self):
+ """ Test QName that is already partially escaped. """
+ qname = markdown.util.etree.QName('&lt;&amp;"test&#10;escaping"&gt;', 'div')
+ el = markdown.util.etree.Element(qname)
+ self.assertEqual(
+ markdown.serializers.to_xhtml_string(el),
+ '<div xmlns="&lt;&amp;&quot;test&#10;escaping&quot;&gt;"></div>'
+ )
+
def buildExtension(self):
""" Build an extension which registers fakeSerializer. """
def fakeSerializer(elem):
diff --git a/tests/test_syntax/inline/images.py b/tests/test_syntax/inline/test_images.py
index 52ce330..52ce330 100644
--- a/tests/test_syntax/inline/images.py
+++ b/tests/test_syntax/inline/test_images.py
diff --git a/tests/test_syntax/inline/links.py b/tests/test_syntax/inline/test_links.py
index d74bb75..3e8593f 100644
--- a/tests/test_syntax/inline/links.py
+++ b/tests/test_syntax/inline/test_links.py
@@ -118,3 +118,15 @@ class TestAdvancedLinks(TestCase):
"""<p><a href="http://link.com/with spaces '&quot;and quotes&quot;" title="and title">"""
"""Text</a> more text</p>"""
)
+
+ def test_amp_in_url(self):
+ """Test amp in URLs."""
+
+ self.assertMarkdownRenders(
+ '[link](http://www.freewisdom.org/this&that)',
+ '<p><a href="http://www.freewisdom.org/this&amp;that">link</a></p>'
+ )
+ self.assertMarkdownRenders(
+ '[title](http://example.com/?a=1&amp;b=2)',
+ '<p><a href="http://example.com/?a=1&amp;b=2">title</a></p>'
+ )