Version 1.5

author: Yuri Takhteyev <yuri@freewisdom.org> 2006-05-16 05:20:45 +0000
committer: Yuri Takhteyev <yuri@freewisdom.org> 2006-05-16 05:20:45 +0000
commit: eef424db7e58721d83985ea87aba17690ec2e1b7 (patch)
tree: 67f73d16d8b9158cde1370b6d84926acbaca8208 /markdown.py
parent: a7378d6f209e99955c4911e129d4a311c265b4a4 (diff)
download: markdown-eef424db7e58721d83985ea87aba17690ec2e1b7.tar.gz
markdown-eef424db7e58721d83985ea87aba17690ec2e1b7.tar.bz2
markdown-eef424db7e58721d83985ea87aba17690ec2e1b7.zip
1 files changed, 130 insertions, 37 deletions
diff --git a/markdown.py b/markdown.py
index a187990..6b3e57a 100644
--- a/markdown.py
+++ b/markdown.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 
+SPEED_TEST = 0
+
 """
 ====================================================================
 IF YOU ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION
@@ -21,7 +23,7 @@ Contact: yuri [at] freewisdom.org
 
 License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
 
-Version: 1.3 (Feb. 28, 2006)
+Version: 1.5 (May 15, 2006)
 
 For changelog, see end of file
 """
@@ -187,7 +189,7 @@ class Element :
             value = self.attribute_values[attr]
             value = self.doc.normalizeEntities(value)
             buffer += ' %s="%s"' % (attr, value)
-        if self.childNodes :
+        if self.childNodes or self.nodeName in ['blockquote']:
             buffer += ">"
             for child in self.childNodes :
                 buffer += child.toxml()
@@ -271,6 +273,9 @@ class HeaderPreprocessor :
             if not lines[i] :
                 continue
 
+            if lines[i].startswith("#") :
+                lines.insert(i+1, "\n")
+
             if (i+1 <= len(lines)
                   and lines[i+1]
                   and lines[i+1][0] in ['-', '=']) :
@@ -798,7 +803,9 @@ class Markdown:
         """Creates a new Markdown instance.
 
            @param source: The text in Markdown format. """
-
+        
+        if isinstance(source, unicode):
+            source = source.encode('utf8')
         self.source = source
         self.blockGuru = BlockGuru()
         self.registeredExtensions = []
@@ -816,6 +823,9 @@ class Markdown:
         self.postprocessors = [] # a footnote postprocessor will get
                                  # inserted later
 
+        self.prePatterns = []
+        
+
         self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN,
                                 BACKTICK_PATTERN,
                                 ESCAPE_PATTERN,
@@ -889,7 +899,18 @@ class Markdown:
             self.lines = prep.run(self.lines)
 
         # Create a NanoDom tree from the lines and attach it to Document
-        self._processSection(self.top_element, self.lines)
+
+
+        buffer = []
+        for line in self.lines :
+            if line.startswith("#") :
+                self._processSection(self.top_element, buffer)
+                buffer = [line]
+            else :
+                buffer.append(line)
+        self._processSection(self.top_element, buffer)
+        
+        #self._processSection(self.top_element, self.lines)
 
         # Not sure why I put this in but let's leave it for now.
         self.top_element.appendChild(self.doc.createTextNode('\n'))
@@ -972,14 +993,14 @@ class Markdown:
                     level = len(m.group(1))
                     h = self.doc.createElement("h%d" % level)
                     parent_elem.appendChild(h)
-                    for item in self._handleInlineWrapper(m.group(2)) :
+                    for item in self._handleInlineWrapper2(m.group(2).strip()) :
                         h.appendChild(item)
                 else :
                     message(CRITICAL, "We've got a problem header!")
 
             elif paragraph :
 
-                list = self._handleInlineWrapper("\n".join(paragraph))
+                list = self._handleInlineWrapper2("\n".join(paragraph))
 
                 if ( parent_elem.nodeName == 'li'
                      and not (looseList or parent_elem.childNodes)):
@@ -1052,7 +1073,8 @@ class Markdown:
                     break
 
                 # Check if the next non-blank line is still a part of the list
-                if ( RE.regExp[listexpr].match(next) or
+                if ( RE.regExp['ul'].match(next) or
+                     RE.regExp['ol'].match(next) or 
                      RE.regExp['tabbed'].match(next) ):
                     # get rid of any white space in the line
                     items[item].append(line.strip())
@@ -1064,11 +1086,11 @@ class Markdown:
             # Now we need to detect list items (at the current level)
             # while also detabing child elements if necessary
 
-            for expr in [listexpr, 'tabbed']:
+            for expr in ['ul', 'ol', 'tabbed']:
 
                 m = RE.regExp[expr].match(line)
                 if m :
-                    if expr == listexpr :  # We are looking at a new item
+                    if expr in ['ul', 'ol'] :  # We are looking at a new item
                         if m.group(1) :
                             items.append([m.group(1)])
                             item += 1
@@ -1164,25 +1186,65 @@ class Markdown:
         self._processSection(parent_elem, theRest, inList)
 
 
+    def _handleInlineWrapper2 (self, line) :
+
+
+        parts = [line]
+
+        #if not(line):
+        #    return [self.doc.createTextNode(' ')]
+
+        for pattern in self.inlinePatterns :
+
+            #print
+            #print self.inlinePatterns.index(pattern)
+
+            i = 0
+
+            #print parts
+            while i < len(parts) :
+                
+                x = parts[i]
+                #print i
+                if isinstance(x, (str, unicode)) :
+                    result = self._applyPattern(x, pattern)
+                    #print result
+                    #print result
+                    #print parts, i
+                    if result :
+                        i -= 1
+                        parts.remove(x)
+                        for y in result :
+                            parts.insert(i+1,y)
+                
+                i += 1
+
+        for i in range(len(parts)) :
+            x = parts[i]
+            if isinstance(x, (str, unicode)) :
+                parts[i] = self.doc.createTextNode(x)
+
+        return parts
+        
+
+
     def _handleInlineWrapper (self, line) :
 
         # A wrapper around _handleInline to avoid recursion
 
-        strtype = type("string")
         parts = [line]
-        dirty = 1
-
-        while dirty:
-            dirty = 0
-            for x in parts :
-                if type(x) == strtype :
-                    i = parts.index(x)
-                    parts.remove(x)
-                    result = self._handleInline(x)
-                    result.reverse()
-                    for y in result :
-                        parts.insert(i,y)
-                    dirty = 1
+
+        i = 0
+        
+        while i < len(parts) :
+            x = parts[i]
+            if isinstance(x, (str, unicode)) :
+                parts.remove(x)
+                result = self._handleInline(x)
+                for y in result :
+                    parts.insert(i,y)
+            else :
+                i += 1
 
         return parts
 
@@ -1194,16 +1256,10 @@ class Markdown:
         See notes on inline patterns above.
 
         @param item: A block of Markdown text
-        @return: A list of NanoDomnodes """
+        @return: A list of NanoDom nodes """
+
         if not(line):
             return [self.doc.createTextNode(' ')]
-        # two spaces at the end of the line denote a <br/>
-        #if line.endswith('  '):
-        #    list = self._handleInline( line.rstrip())
-        #    list.append(self.doc.createElement('br'))
-        #    return list
-        #
-        # ::TODO:: Replace with a preprocessor
 
         for pattern in self.inlinePatterns :
             list = self._applyPattern( line, pattern)
@@ -1235,9 +1291,11 @@ class Markdown:
         node = pattern.handleMatch(m, self.doc)
 
         if node :
-            return [m.group(1),     # the string to the right of the match
-                    node,           # the new node
-                    m.groups()[-1]] # the string to the left
+            # Those are in the reverse order!
+            return ( m.groups()[-1], # the string to the left
+                     node,           # the new node
+                     m.group(1))     # the string to the right of the match
+
         else :
             return None
 
@@ -1266,6 +1324,9 @@ class Markdown:
         if self.stripTopLevelTags :
             xml = xml.strip()[23:-7]
 
+        if isinstance(xml, unicode) :
+            xml = xml.encode("utf8")
+
         return xml
 
 
@@ -1294,6 +1355,8 @@ class FootnoteExtension :
     DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
     SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
 
+    FN_PLACE_MARKER = "///Footnotes Go Here///"
+
     def __init__ (self) :
         self.reset()
 
@@ -1316,7 +1379,11 @@ class FootnoteExtension :
         md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
 
         # Insert a post-processor that would actually add the footnote div
-        md.postprocessors.append(FootnotePostprocessor(self))
+        postprocessor = FootnotePostprocessor(self)
+        postprocessor.extension = self
+        
+        md.postprocessors.append(postprocessor)
+
 
     def reset(self) :
         # May be called by Markdown is state reset is desired
@@ -1325,6 +1392,17 @@ class FootnoteExtension :
         self.used_footnotes={}
         self.footnotes = {}
 
+    def findFootnotesPlaceholder(self, doc) :
+        def findFootnotePlaceholderFn(node=None, indent=0):
+            if node.type == 'text':
+                if node.value.find(self.FN_PLACE_MARKER) > -1 :
+                    return True
+
+        fn_div_list = doc.find(findFootnotePlaceholderFn)
+        if fn_div_list :
+            return fn_div_list[0]
+
+
     def setFootnote(self, id, text) :
         self.footnotes[id] = text
 
@@ -1475,13 +1553,17 @@ class FootnotePostprocessor :
     def run(self, doc) :
         footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
         if footnotesDiv :
-            doc.documentElement.appendChild(footnotesDiv)
+            fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
+            if fnPlaceholder :
+                fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
+            else :
+                doc.documentElement.appendChild(footnotesDiv)
 
 # ====================================================================
 
 def markdown(text) :
     message(VERBOSE, "in markdown.py, received text:\n%s" % text)
-    return str(Markdown(text))
+    return Markdown(text).toString()
 
 def markdownWithFootnotes(text):
     message(VERBOSE, "Running markdown with footnotes, "
@@ -1697,12 +1779,23 @@ if __name__ == '__main__':
     if testing:
         test_markdown(args)
     else:
+        import time
+        t0 = time.time()
+        #for x in range(10) :
         cmd_line(args)
+        #import profile
+        #profile.run('cmd_line(args)', 'profile')
+        t1 = time.time()
+        #print "Time: %f - %f = %f" % (t1, t0, t1-t0)
 
 """
 CHANGELOG
 =========
 
+May 15, 2006: A bug with lists, recursion on block-level elements,
+run-in headers, spaces before headers, unicode input (thanks to Aaron
+Swartz). Sourceforge tracker #s: 1489313, 1489312, 1489311, 1488370,
+1485178, 1485176. (v. 1.5)
 
 Mar. 24, 2006: Switched to a not-so-recursive algorithm with
 _handleInline.  (Version 1.4)
author	Yuri Takhteyev <yuri@freewisdom.org>	2006-05-16 05:20:45 +0000
committer	Yuri Takhteyev <yuri@freewisdom.org>	2006-05-16 05:20:45 +0000
commit	eef424db7e58721d83985ea87aba17690ec2e1b7 (patch)
tree	67f73d16d8b9158cde1370b6d84926acbaca8208 /markdown.py
parent	a7378d6f209e99955c4911e129d4a311c265b4a4 (diff)
download	markdown-eef424db7e58721d83985ea87aba17690ec2e1b7.tar.gz markdown-eef424db7e58721d83985ea87aba17690ec2e1b7.tar.bz2 markdown-eef424db7e58721d83985ea87aba17690ec2e1b7.zip