aboutsummaryrefslogtreecommitdiffstats
path: root/markdown.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown.py')
-rw-r--r--markdown.py26
1 files changed, 20 insertions, 6 deletions
diff --git a/markdown.py b/markdown.py
index c57ef47..4d1eeaf 100644
--- a/markdown.py
+++ b/markdown.py
@@ -84,7 +84,12 @@ Importantly, NanoDom does not do normalization, which is what we
want. It also adds extra white space when converting DOM to string
"""
-ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&(?!\#)"), "&"),
+ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&"),
+ (re.compile("<"), "&lt;"),
+ (re.compile(">"), "&gt;"),
+ (re.compile("\""), "&quot;")]
+
+ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&amp;"),
(re.compile("<"), "&lt;"),
(re.compile(">"), "&gt;"),
(re.compile("\""), "&quot;")]
@@ -122,9 +127,14 @@ class Document :
def toxml (self) :
return self.documentElement.toxml()
- def normalizeEntities(self, text) :
+ def normalizeEntities(self, text, avoidDoubleNormalizing=False) :
- for regexp, substitution in ENTITY_NORMALIZATION_EXPRESSIONS :
+ if avoidDoubleNormalizing :
+ regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT
+ else :
+ regexps = ENTITY_NORMALIZATION_EXPRESSIONS
+
+ for regexp, substitution in regexps :
text = regexp.sub(substitution, text)
return text
@@ -213,7 +223,7 @@ class Element :
buffer += "<" + self.nodeName
for attr in self.attributes :
value = self.attribute_values[attr]
- value = self.doc.normalizeEntities(value)
+ value = self.doc.normalizeEntities(value, avoidDoubleNormalizing=True)
buffer += ' %s="%s"' % (attr, value)
if self.childNodes or self.nodeName in ['blockquote']:
buffer += ">"
@@ -399,6 +409,7 @@ class HtmlBlockPreprocessor (Preprocessor):
def run (self, lines) :
+
new_blocks = []
text = "\n".join(lines)
text = text.split("\n\n")
@@ -635,12 +646,12 @@ class LinkPattern (Pattern):
parts = m.group(9).split('"')
# We should now have [], [href], or [href, title]
if parts :
- el.setAttribute('href', parts[0])
+ el.setAttribute('href', parts[0].strip())
else :
el.setAttribute('href', "")
if len(parts) > 1 :
# we also got a title
- title = " ".join(parts[1:]).strip()
+ title = '"' + '"'.join(parts[1:]).strip()
title = dequote(title) #.replace('"', "&quot;")
el.setAttribute('title', title)
return el
@@ -1388,6 +1399,9 @@ class Markdown:
# (ideally this should be recursive.
# here we only go one level deep)
+ if x.nodeName in ["code", "pre"] :
+ break
+
j = 0
while j < len(x.childNodes):
child = x.childNodes[j]