Updated definition list extension to work with new block parser.

author: Waylan Limberg <waylan@gmail.com> 2008-11-22 22:38:40 -0500
committer: Waylan Limberg <waylan@gmail.com> 2008-11-22 22:40:33 -0500
commit: 3158c3467fe521422e211c8937033018f942d549 (patch)
tree: 4da108c1fe3827cc16b07d1c5a850d19cdffce2f
parent: 461dbb80de64cdfc0c29c3625f0c70b9653b7370 (diff)
download: markdown-3158c3467fe521422e211c8937033018f942d549.tar.gz
markdown-3158c3467fe521422e211c8937033018f942d549.tar.bz2
markdown-3158c3467fe521422e211c8937033018f942d549.zip
2 files changed, 68 insertions, 170 deletions
diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py
index 831e840..616f808 100644
--- a/markdown/extensions/def_list.py
+++ b/markdown/extensions/def_list.py
@@ -20,185 +20,83 @@ Copyright 2008 - [Waylan Limberg](http://achinghead.com)
 """
 
 import markdown, re
-from markdown import etree, CORE_RE
-
-DEF_RE = re.compile(r'^[ ]{0,3}:[ ]{1,3}(.*)$')
-
-class DefListParser(markdown.MarkdownParser):
-    """ Subclass of MarkdownParser which adds definition list parsing. """
-
-    def parseChunk(self, parent_elem, lines, inList=0, looseList=0):
-        """Process a chunk of markdown-formatted text and attach the parse to
-        an ElementTree node.
-
-        Process a section of a source document, looking for high
-        level structural elements like lists, block quotes, code
-        segments, html blocks, etc.  Some those then get stripped
-        of their high level markup (e.g. get unindented) and the
-        lower-level markup is processed recursively.
-
-        Keyword arguments:
-
-        * parent_elem: The ElementTree element to which the content will be
-                       added.
-        * lines: a list of lines
-        * inList: a level
-
-        Returns: None
-
-        """
-        # Loop through lines until none left.
-        while lines:
-            # Skipping empty line
-            if not lines[0]:
-                lines = lines[1:]
-                continue
-
-            # Check if this section starts with a list, a blockquote or
-            # a code block.  If so, process them.
-            processFn = { 'ul':     self._MarkdownParser__processUList,
-                          'ol':     self._MarkdownParser__processOList,
-                          'quoted': self._MarkdownParser__processQuote,
-                          'tabbed': self._MarkdownParser__processCodeBlock}
-            for regexp in ['ul', 'ol', 'quoted', 'tabbed']:
-                m = CORE_RE[regexp].match(lines[0])
-                if m:
-                    processFn[regexp](parent_elem, lines, inList)
-                    return
-
-            # We are NOT looking at one of the high-level structures like
-            # lists or blockquotes.  So, it's just a regular paragraph
-            # (though perhaps nested inside a list or something else).  If
-            # we are NOT inside a list, we just need to look for a blank
-            # line to find the end of the block.  If we ARE inside a
-            # list, however, we need to consider that a sublist does not
-            # need to be separated by a blank line.  Rather, the following
-            # markup is legal:
-            #
-            # * The top level list item
-            #
-            #     Another paragraph of the list.  This is where we are now.
-            #     * Underneath we might have a sublist.
-            #
-
-            if inList:
-                start, lines  = self._MarkdownParser__linesUntil(lines, (lambda line:
-                                 CORE_RE['ul'].match(line)
-                                 or CORE_RE['ol'].match(line)
-                                                  or not line.strip()))
-                self.parseChunk(parent_elem, start, inList-1,
-                                looseList=looseList)
-                inList = inList-1
-
-            else: # Ok, so it's just a simple block
-                test = lambda line: not line.strip() or line[0] == '>'
-                paragraph, lines = self._MarkdownParser__linesUntil(lines, test)
-                if len(paragraph) and paragraph[0].startswith('#'):
-                    self._MarkdownParser__processHeader(parent_elem, paragraph)
-                elif len(paragraph) and CORE_RE["isline3"].match(paragraph[0]):
-                    self._MarkdownParser__processHR(parent_elem)
-                    lines = paragraph[1:] + lines
-                elif paragraph:
-                    paragraph, lines, looseList = self._processDefs(parent_elem,
-                                                                    paragraph, 
-                                                                    lines, 
-                                                                    looseList)
-                    if len(paragraph):
-                        self._MarkdownParser__processParagraph(parent_elem, 
-                                                               paragraph,
-                                                               inList, 
-                                                               looseList)
-
-            if lines and not lines[0].strip():
-                lines = lines[1:]  # skip the first (blank) line
-
-
-    def _processDefs(self, parentElem, paragraph, lines, looseList):
-        """ Check a paragraph for definition lists and process. """
-        terms = []
-        defs = []
-        i = 0
-        while i < len(paragraph):
-            m = DEF_RE.match(paragraph[i])
-            if m:
-                d, theRest  = self.detectTabbed(paragraph[i+1:])
-                d.insert(0, m.group(1))
-                if d:
-                    defs.append(d)
-                    i += len(d)
-            else:
-                terms.append(paragraph[i])
-                i += 1
-        if defs:
-            if not terms:
-                # The previous paragraph must contain the terms
-                c = parentElem.getchildren()
-                if c and c[-1].tag == "p" and c[-1].text:
-                    terms = c[-1].text.split("\n")
-                    parentElem.remove(c[-1])
-                    looseList = 1
-            # check for extra paragraphs of a def
-            extradef, lines = self.detectTabbed(lines)
-            if extradef:
-                 looseList = 1
-                 defs[-1].extend(extradef)
-            # Build a tree from the terms and defs
-            c = parentElem.getchildren()
-            if c and c[-1].tag == "dl":
-                dl = c[-1]
-            else:
-                dl = etree.SubElement(parentElem, "dl")
-            for term in terms:
-                dt = etree.SubElement(dl, "dt")
-                dt.text = term
-            for d in defs:
-                dd = etree.SubElement(dl, "dd")
-                self.parseChunk(dd, d, looseList = looseList)
-            return [], lines, looseList
-        else:
-            return terms, lines, looseList
+from markdown import etree
+
+
+class DefListProcessor(markdown.blockprocessors.BlockProcessor):
+    """ Process Definition Lists. """
 
-    def _MarkdownParser__processParagraph(self, parentElem, paragraph, 
-                                          inList, looseList):
+    RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)')
 
-        if ((parentElem.tag == 'li' or parentElem.tag == 'dd')
-                and not (looseList or parentElem.getchildren())):
+    def test(self, parent, block):
+        return bool(self.RE.search(block))
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        m = self.RE.search(block)
+        terms = [l.strip() for l in block[:m.start()].split('\n') if l.strip()]
+        d, theRest = self.detab(block[m.end():])
+        if d:
+            d = '%s\n%s' % (m.group(2), d)
+        else:
+            d = m.group(2)
+        #import ipdb; ipdb.set_trace()
+        sibling = self.lastChild(parent)
+        if not terms and sibling.tag == 'p':
+            # The previous paragraph contains the terms
+            state = 'looselist'
+            terms = sibling.text.split('\n')
+            parent.remove(sibling)
+            # Aquire new sibling
+            sibling = self.lastChild(parent)
+        else:
+            state = 'list'
 
-            # If this is the first paragraph inside "li", don't
-            # put <p> around it - append the paragraph bits directly
-            # onto parentElem
-            el = parentElem
+        if sibling and sibling.tag == 'dl':
+            # This is another item on an existing list
+            dl = sibling
+            if len(dl) and dl[-1].tag == 'dd' and len(dl[-1]):
+                state = 'looselist'
         else:
-            # Otherwise make a "p" element
-            el = etree.SubElement(parentElem, "p")
-
-        dump = []
-
-        # Searching for hr or header
-        for line in paragraph:
-            # it's hr
-            if CORE_RE["isline3"].match(line):
-                el.text = "\n".join(dump)
-                self._MarkdownParser__processHR(el)
-                dump = []
-            # it's header
-            elif line.startswith("#"):
-                el.text = "\n".join(dump)
-                self._MarkdownParser__processHeader(parentElem, [line])
-                dump = []
-            else:
-                dump.append(line)
-        if dump:
-            text = "\n".join(dump)
-            el.text = text
+            # This is a new list
+            dl = etree.SubElement(parent, 'dl')
+        # Add terms
+        for term in terms:
+            dt = etree.SubElement(dl, 'dt')
+            dt.text = term
+        # Add definition
+        self.parser.state.set(state)
+        dd = etree.SubElement(dl, 'dd')
+        self.parser.parseBlocks(dd, [d])
+        self.parser.state.reset()
+
+        if theRest:
+            blocks.insert(0, theRest)
+
+class DefListIndentProcessor(markdown.blockprocessors.ListIndentProcessor):
+    """ Process indented children of definition list items. """
+
+    ITEM_TYPES = ['dd']
+    LIST_TYPES = ['dl']
+
+    def create_item(parent, block):
+        """ Create a new dd and parse the block with it as the parent. """
+        dd = markdown.etree.SubElement(sibling, 'dd')
+        self.parser.parseBlocks(dd, [block])
+ 
 
 
 class DefListExtension(markdown.Extension):
     """ Add definition lists to Markdown. """
 
     def extendMarkdown(self, md, md_globals):
-        """ Set the core parser to an instance of DefListParser. """
-        md.parser = DefListParser()
+        """ Add an instance of DefListProcessor to BlockParser. """
+        md.parser.blockprocessors.add('defindent',
+                                      DefListIndentProcessor(md.parser),
+                                      '>indent')
+        md.parser.blockprocessors.add('deflist', 
+                                      DefListProcessor(md.parser),
+                                      '>ulist')
 
 
 def makeExtension(configs={}):
diff --git a/test-markdown.py b/test-markdown.py
index 6c033b0..3d9ef66 100755
--- a/test-markdown.py
+++ b/test-markdown.py
@@ -375,7 +375,7 @@ testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape")
 testDirectory("tests/extensions-x-codehilite")
 testDirectory("tests/extensions-x-wikilinks")
 testDirectory("tests/extensions-x-toc")
-#testDirectory("tests/extensions-x-def_list")
+testDirectory("tests/extensions-x-def_list")
 testDirectory("tests/extensions-x-abbr")
 
 print "\n### Final result ###"
author	Waylan Limberg <waylan@gmail.com>	2008-11-22 22:38:40 -0500
committer	Waylan Limberg <waylan@gmail.com>	2008-11-22 22:40:33 -0500
commit	3158c3467fe521422e211c8937033018f942d549 (patch)
tree	4da108c1fe3827cc16b07d1c5a850d19cdffce2f
parent	461dbb80de64cdfc0c29c3625f0c70b9653b7370 (diff)
download	markdown-3158c3467fe521422e211c8937033018f942d549.tar.gz markdown-3158c3467fe521422e211c8937033018f942d549.tar.bz2 markdown-3158c3467fe521422e211c8937033018f942d549.zip