Got rid of some side effects of the new bug fixes.

author: Yuri Takhteyev <yuri@freewisdom.org> 2007-03-19 02:03:30 +0000
committer: Yuri Takhteyev <yuri@freewisdom.org> 2007-03-19 02:03:30 +0000
commit: 56cf5f0beaa82d78d6a9b13af545dd099e095ccf (patch)
tree: a8a2734ebc5492e9129550d5f191d9d0f3c7ef39 /markdown.py
parent: 6734c7c896365533898d321b2eae4745c380310e (diff)
download: markdown-56cf5f0beaa82d78d6a9b13af545dd099e095ccf.tar.gz
markdown-56cf5f0beaa82d78d6a9b13af545dd099e095ccf.tar.bz2
markdown-56cf5f0beaa82d78d6a9b13af545dd099e095ccf.zip
1 files changed, 20 insertions, 6 deletions
diff --git a/markdown.py b/markdown.py
index c57ef47..4d1eeaf 100644
--- a/markdown.py
+++ b/markdown.py
@@ -84,7 +84,12 @@ Importantly, NanoDom does not do normalization, which is what we
 want. It also adds extra white space when converting DOM to string
 """
 
-ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&(?!\#)"), "&amp;"),
+ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&amp;"),
+                                     (re.compile("<"), "&lt;"),
+                                     (re.compile(">"), "&gt;"),
+                                     (re.compile("\""), "&quot;")]
+
+ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&amp;"),
                                      (re.compile("<"), "&lt;"),
                                      (re.compile(">"), "&gt;"),
                                      (re.compile("\""), "&quot;")]
@@ -122,9 +127,14 @@ class Document :
     def toxml (self) :
         return self.documentElement.toxml()
 
-    def normalizeEntities(self, text) :
+    def normalizeEntities(self, text, avoidDoubleNormalizing=False) :
 
-        for regexp, substitution in ENTITY_NORMALIZATION_EXPRESSIONS :
+        if avoidDoubleNormalizing :
+            regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT
+        else :
+            regexps = ENTITY_NORMALIZATION_EXPRESSIONS
+
+        for regexp, substitution in regexps :
             text = regexp.sub(substitution, text)
         return text
 
@@ -213,7 +223,7 @@ class Element :
         buffer += "<" + self.nodeName
         for attr in self.attributes :
             value = self.attribute_values[attr]
-            value = self.doc.normalizeEntities(value)
+            value = self.doc.normalizeEntities(value, avoidDoubleNormalizing=True)
             buffer += ' %s="%s"' % (attr, value)
         if self.childNodes or self.nodeName in ['blockquote']:
             buffer += ">"
@@ -399,6 +409,7 @@ class HtmlBlockPreprocessor (Preprocessor):
 
     
     def run (self, lines) :
+
         new_blocks = []
         text = "\n".join(lines)
         text = text.split("\n\n")
@@ -635,12 +646,12 @@ class LinkPattern (Pattern):
         parts = m.group(9).split('"')
         # We should now have [], [href], or [href, title]
         if parts :
-            el.setAttribute('href', parts[0])
+            el.setAttribute('href', parts[0].strip())
         else :
             el.setAttribute('href', "")
         if len(parts) > 1 :
             # we also got a title
-            title = " ".join(parts[1:]).strip()
+            title = '"' + '"'.join(parts[1:]).strip()
             title = dequote(title) #.replace('"', "&quot;")
             el.setAttribute('title', title)
         return el
@@ -1388,6 +1399,9 @@ class Markdown:
                     # (ideally this should be recursive.
                     # here we only go one level deep)
 
+                    if x.nodeName in ["code", "pre"] :
+                        break
+
                     j = 0
                     while j < len(x.childNodes):
                         child = x.childNodes[j]
author	Yuri Takhteyev <yuri@freewisdom.org>	2007-03-19 02:03:30 +0000
committer	Yuri Takhteyev <yuri@freewisdom.org>	2007-03-19 02:03:30 +0000
commit	56cf5f0beaa82d78d6a9b13af545dd099e095ccf (patch)
tree	a8a2734ebc5492e9129550d5f191d9d0f3c7ef39 /markdown.py
parent	6734c7c896365533898d321b2eae4745c380310e (diff)
download	markdown-56cf5f0beaa82d78d6a9b13af545dd099e095ccf.tar.gz markdown-56cf5f0beaa82d78d6a9b13af545dd099e095ccf.tar.bz2 markdown-56cf5f0beaa82d78d6a9b13af545dd099e095ccf.zip