Added Definition List extension. This is a first draft. A few more advanced features in PHP's implementation don't work yet. Most notably a blank line between a term and its first definition will break things. See the included test for what works.

author: Waylan Limberg <waylan@gmail.com> 2008-11-04 17:37:43 -0500
committer: Waylan Limberg <waylan@gmail.com> 2008-11-04 17:37:43 -0500
commit: a4106db7aff9e961f0c40a738f19ccf06a7cae6c (patch)
tree: b403e1af3332899d3d92898bc37ea117383c2e1e
parent: cbd702c752971b0f55e88cdb08ccf6965061bbf4 (diff)
download: markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.tar.gz
markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.tar.bz2
markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.zip
4 files changed, 271 insertions, 0 deletions
diff --git a/markdown_extensions/def_list.py b/markdown_extensions/def_list.py
new file mode 100644
index 0000000..a988e13
--- /dev/null
+++ b/markdown_extensions/def_list.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env Python
+"""
+Definition List Extension for Python-Markdown
+=============================================
+
+Added parsing of Definition Lists to Python-Markdown.
+
+A simple example:
+
+    Apple
+    :   Pomaceous fruit of plants of the genus Malus in 
+        the family Rosaceae.
+    :   An american computer company.
+
+    Orange
+    :   The fruit of an evergreen tree of the genus Citrus.
+
+Copyright 2008 - [Waylan Limberg](http://achinghead.com)
+
+"""
+
+import markdown
+from markdown import etree, CORE_RE
+
+class DefListParser(markdown.MarkdownParser):
+    """ Subclass of MarkdownParser which adds definition list parsing. """
+
+    def parseChunk(self, parent_elem, lines, inList=0, looseList=0):
+        """Process a chunk of markdown-formatted text and attach the parse to
+        an ElementTree node.
+
+        Process a section of a source document, looking for high
+        level structural elements like lists, block quotes, code
+        segments, html blocks, etc.  Some those then get stripped
+        of their high level markup (e.g. get unindented) and the
+        lower-level markup is processed recursively.
+
+        Keyword arguments:
+
+        * parent_elem: The ElementTree element to which the content will be
+                       added.
+        * lines: a list of lines
+        * inList: a level
+
+        Returns: None
+
+        """
+        # Loop through lines until none left.
+        while lines:
+            # Skipping empty line
+            if not lines[0]:
+                lines = lines[1:]
+                continue
+
+            # Check if this section starts with a list, a blockquote or
+            # a code block.  If so, process them.
+            processFn = { 'ul':     self._MarkdownParser__processUList,
+                          'ol':     self._MarkdownParser__processOList,
+                          'quoted': self._MarkdownParser__processQuote,
+                          'tabbed': self._MarkdownParser__processCodeBlock}
+            for regexp in ['ul', 'ol', 'quoted', 'tabbed']:
+                m = CORE_RE[regexp].match(lines[0])
+                if m:
+                    processFn[regexp](parent_elem, lines, inList)
+                    return
+
+            # We are NOT looking at one of the high-level structures like
+            # lists or blockquotes.  So, it's just a regular paragraph
+            # (though perhaps nested inside a list or something else).  If
+            # we are NOT inside a list, we just need to look for a blank
+            # line to find the end of the block.  If we ARE inside a
+            # list, however, we need to consider that a sublist does not
+            # need to be separated by a blank line.  Rather, the following
+            # markup is legal:
+            #
+            # * The top level list item
+            #
+            #     Another paragraph of the list.  This is where we are now.
+            #     * Underneath we might have a sublist.
+            #
+
+            if inList:
+                start, lines  = self._MarkdownParser__linesUntil(lines, (lambda line:
+                                 CORE_RE['ul'].match(line)
+                                 or CORE_RE['ol'].match(line)
+                                                  or not line.strip()))
+                self.parseChunk(parent_elem, start, inList-1,
+                                looseList=looseList)
+                inList = inList-1
+
+            else: # Ok, so it's just a simple block
+                test = lambda line: not line.strip() or line[0] == '>'
+                paragraph, lines = self._MarkdownParser__linesUntil(lines, test)
+                if len(paragraph) and paragraph[0].startswith('#'):
+                    self._MarkdownParser__processHeader(parent_elem, paragraph)
+                elif len(paragraph) and CORE_RE["isline3"].match(paragraph[0]):
+                    self._MarkdownParser__processHR(parent_elem)
+                    lines = paragraph[1:] + lines
+                elif paragraph:
+                    terms, defs, paragraph = self._getDefs(paragraph)
+                    if defs:
+                        # check for extra paragraphs of a def
+                        extradef, lines = self.detectTabbed(lines)
+                        defs[-1].extend(extradef)
+                        self._processDef(parent_elem, terms, defs)
+                    if len(paragraph):
+                        self._MarkdownParser__processParagraph(parent_elem, 
+                                                               paragraph,
+                                                               inList, 
+                                                               looseList)
+
+            if lines and not lines[0].strip():
+                lines = lines[1:]  # skip the first (blank) line
+
+
+    def _getDefs(self, lines):
+        terms = []
+        defs = []
+        i = 0
+        while i < len(lines):
+            if lines[i].startswith(':   '):
+                d  = self._getDef(lines[i:])
+                if d:
+                    defs.append(d)
+                    i += len(d)
+            else:
+                terms.append(lines[i])
+                i += 1
+        if defs:
+            return terms, defs, []
+        else:
+            return None, None, terms
+
+    def _getDef(self, lines):
+        if lines[0].startswith(':   '):
+            Def, theRest  = self.detectTabbed(lines[1:])
+            Def.insert(0, lines[0][4:])
+            return Def
+        return []
+
+    def _processDef(self, parentElem, terms, defs):
+        children = parentElem.getchildren()
+        if children and children[-1].tag == "dl":
+            dl = children[-1]
+        else:
+            dl = etree.SubElement(parentElem, "dl")
+        for term in terms:
+            dt = etree.SubElement(dl, "dt")
+            dt.text = term
+        for d in defs:
+            dd = etree.SubElement(dl, "dd")
+            self.parseChunk(dd, d)
+
+    def _MarkdownParser__processParagraph(self, parentElem, paragraph, inList, looseList):
+
+        if ( parentElem.tag == 'li'
+                and not (looseList or parentElem.getchildren())):
+
+            # If this is the first paragraph inside "li", don't
+            # put <p> around it - append the paragraph bits directly
+            # onto parentElem
+            el = parentElem
+        else:
+            # Otherwise make a "p" element
+            el = etree.SubElement(parentElem, "p")
+
+        dump = []
+
+        # Searching for hr or header
+        for line in paragraph:
+            # it's hr
+            if CORE_RE["isline3"].match(line):
+                el.text = "\n".join(dump)
+                self.__processHR(el)
+                dump = []
+            # it's header
+            elif line.startswith("#"):
+                el.text = "\n".join(dump)
+                self.__processHeader(parentElem, [line])
+                dump = []
+            else:
+                dump.append(line)
+        if dump:
+            text = "\n".join(dump)
+            el.text = text
+
+
+class DefListExtension(markdown.Extension):
+    """ Add definition lists to Markdown. """
+
+    def extendMarkdown(self, md, md_globals):
+        """ Set the core parser to an instance of DefListParser. """
+        md.parser = DefListParser()
+
+
+def makeExtension(configs={}):
+    return DefListExtension(configs=configs)
+
diff --git a/test-markdown.py b/test-markdown.py
index 3d89e3b..1445799 100755
--- a/test-markdown.py
+++ b/test-markdown.py
@@ -375,6 +375,7 @@ testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape")
 testDirectory("tests/extensions-x-codehilite")
 testDirectory("tests/extensions-x-wikilinks")
 testDirectory("tests/extensions-x-toc")
+testDirectory("tests/extensions-x-def_list")
 
 print "\n### Final result ###"
 if len(failedTests):
diff --git a/tests/extensions-x-def_list/simple_def-lists.html b/tests/extensions-x-def_list/simple_def-lists.html
new file mode 100644
index 0000000..7cb4d81
--- /dev/null
+++ b/tests/extensions-x-def_list/simple_def-lists.html
@@ -0,0 +1,43 @@
+<p>Some text</p>
+<dl>
+<dt>term1</dt>
+<dd>
+<p>Def1</p>
+</dd>
+<dt>term2-1</dt>
+<dt>term2-2</dt>
+<dd>
+<p>Def2-1</p>
+</dd>
+<dd>
+<p>Def2-2</p>
+</dd>
+</dl>
+<p>more text</p>
+<dl>
+<dt>term <em>3</em></dt>
+<dd>
+<p>def 3
+line <strong>2</strong> of def 3</p>
+<p>paragraph 2 of def 3.</p>
+</dd>
+<dd>
+<p>def 3-2</p>
+<pre><code># A code block in a def
+</code></pre>
+<blockquote>
+<p>a blockquote</p>
+</blockquote>
+<ul>
+<li>
+<p>a list item</p>
+</li>
+<li>
+<blockquote>
+<p>blockquote in list</p>
+</blockquote>
+</li>
+</ul>
+</dd>
+</dl>
+<p>final text.</p>
+\ No newline at end of file
diff --git a/tests/extensions-x-def_list/simple_def-lists.txt b/tests/extensions-x-def_list/simple_def-lists.txt
new file mode 100644
index 0000000..20c028a
--- /dev/null
+++ b/tests/extensions-x-def_list/simple_def-lists.txt
@@ -0,0 +1,29 @@
+Some text
+
+term1
+:   Def1
+
+term2-1
+term2-2
+:   Def2-1
+:   Def2-2
+
+more text
+
+term *3*
+:   def 3
+    line __2__ of def 3
+
+    paragraph 2 of def 3.
+
+:   def 3-2
+
+        # A code block in a def
+
+    > a blockquote
+
+    * a list item
+
+    * > blockquote in list
+
+final text.
author	Waylan Limberg <waylan@gmail.com>	2008-11-04 17:37:43 -0500
committer	Waylan Limberg <waylan@gmail.com>	2008-11-04 17:37:43 -0500
commit	a4106db7aff9e961f0c40a738f19ccf06a7cae6c (patch)
tree	b403e1af3332899d3d92898bc37ea117383c2e1e
parent	cbd702c752971b0f55e88cdb08ccf6965061bbf4 (diff)
download	markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.tar.gz markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.tar.bz2 markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.zip