diff options
author | Waylan Limberg <waylan@gmail.com> | 2008-11-04 17:37:43 -0500 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2008-11-04 17:37:43 -0500 |
commit | a4106db7aff9e961f0c40a738f19ccf06a7cae6c (patch) | |
tree | b403e1af3332899d3d92898bc37ea117383c2e1e | |
parent | cbd702c752971b0f55e88cdb08ccf6965061bbf4 (diff) | |
download | markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.tar.gz markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.tar.bz2 markdown-a4106db7aff9e961f0c40a738f19ccf06a7cae6c.zip |
Added Definition List extension. This is a first draft. A few more advanced features in PHP's implementation don't work yet. Most notably a blank line between a term and its first definition will break things. See the included test for what works.
-rw-r--r-- | markdown_extensions/def_list.py | 198 | ||||
-rwxr-xr-x | test-markdown.py | 1 | ||||
-rw-r--r-- | tests/extensions-x-def_list/simple_def-lists.html | 43 | ||||
-rw-r--r-- | tests/extensions-x-def_list/simple_def-lists.txt | 29 |
4 files changed, 271 insertions, 0 deletions
diff --git a/markdown_extensions/def_list.py b/markdown_extensions/def_list.py new file mode 100644 index 0000000..a988e13 --- /dev/null +++ b/markdown_extensions/def_list.py @@ -0,0 +1,198 @@ +#!/usr/bin/env Python +""" +Definition List Extension for Python-Markdown +============================================= + +Added parsing of Definition Lists to Python-Markdown. + +A simple example: + + Apple + : Pomaceous fruit of plants of the genus Malus in + the family Rosaceae. + : An american computer company. + + Orange + : The fruit of an evergreen tree of the genus Citrus. + +Copyright 2008 - [Waylan Limberg](http://achinghead.com) + +""" + +import markdown +from markdown import etree, CORE_RE + +class DefListParser(markdown.MarkdownParser): + """ Subclass of MarkdownParser which adds definition list parsing. """ + + def parseChunk(self, parent_elem, lines, inList=0, looseList=0): + """Process a chunk of markdown-formatted text and attach the parse to + an ElementTree node. + + Process a section of a source document, looking for high + level structural elements like lists, block quotes, code + segments, html blocks, etc. Some those then get stripped + of their high level markup (e.g. get unindented) and the + lower-level markup is processed recursively. + + Keyword arguments: + + * parent_elem: The ElementTree element to which the content will be + added. + * lines: a list of lines + * inList: a level + + Returns: None + + """ + # Loop through lines until none left. + while lines: + # Skipping empty line + if not lines[0]: + lines = lines[1:] + continue + + # Check if this section starts with a list, a blockquote or + # a code block. If so, process them. + processFn = { 'ul': self._MarkdownParser__processUList, + 'ol': self._MarkdownParser__processOList, + 'quoted': self._MarkdownParser__processQuote, + 'tabbed': self._MarkdownParser__processCodeBlock} + for regexp in ['ul', 'ol', 'quoted', 'tabbed']: + m = CORE_RE[regexp].match(lines[0]) + if m: + processFn[regexp](parent_elem, lines, inList) + return + + # We are NOT looking at one of the high-level structures like + # lists or blockquotes. So, it's just a regular paragraph + # (though perhaps nested inside a list or something else). If + # we are NOT inside a list, we just need to look for a blank + # line to find the end of the block. If we ARE inside a + # list, however, we need to consider that a sublist does not + # need to be separated by a blank line. Rather, the following + # markup is legal: + # + # * The top level list item + # + # Another paragraph of the list. This is where we are now. + # * Underneath we might have a sublist. + # + + if inList: + start, lines = self._MarkdownParser__linesUntil(lines, (lambda line: + CORE_RE['ul'].match(line) + or CORE_RE['ol'].match(line) + or not line.strip())) + self.parseChunk(parent_elem, start, inList-1, + looseList=looseList) + inList = inList-1 + + else: # Ok, so it's just a simple block + test = lambda line: not line.strip() or line[0] == '>' + paragraph, lines = self._MarkdownParser__linesUntil(lines, test) + if len(paragraph) and paragraph[0].startswith('#'): + self._MarkdownParser__processHeader(parent_elem, paragraph) + elif len(paragraph) and CORE_RE["isline3"].match(paragraph[0]): + self._MarkdownParser__processHR(parent_elem) + lines = paragraph[1:] + lines + elif paragraph: + terms, defs, paragraph = self._getDefs(paragraph) + if defs: + # check for extra paragraphs of a def + extradef, lines = self.detectTabbed(lines) + defs[-1].extend(extradef) + self._processDef(parent_elem, terms, defs) + if len(paragraph): + self._MarkdownParser__processParagraph(parent_elem, + paragraph, + inList, + looseList) + + if lines and not lines[0].strip(): + lines = lines[1:] # skip the first (blank) line + + + def _getDefs(self, lines): + terms = [] + defs = [] + i = 0 + while i < len(lines): + if lines[i].startswith(': '): + d = self._getDef(lines[i:]) + if d: + defs.append(d) + i += len(d) + else: + terms.append(lines[i]) + i += 1 + if defs: + return terms, defs, [] + else: + return None, None, terms + + def _getDef(self, lines): + if lines[0].startswith(': '): + Def, theRest = self.detectTabbed(lines[1:]) + Def.insert(0, lines[0][4:]) + return Def + return [] + + def _processDef(self, parentElem, terms, defs): + children = parentElem.getchildren() + if children and children[-1].tag == "dl": + dl = children[-1] + else: + dl = etree.SubElement(parentElem, "dl") + for term in terms: + dt = etree.SubElement(dl, "dt") + dt.text = term + for d in defs: + dd = etree.SubElement(dl, "dd") + self.parseChunk(dd, d) + + def _MarkdownParser__processParagraph(self, parentElem, paragraph, inList, looseList): + + if ( parentElem.tag == 'li' + and not (looseList or parentElem.getchildren())): + + # If this is the first paragraph inside "li", don't + # put <p> around it - append the paragraph bits directly + # onto parentElem + el = parentElem + else: + # Otherwise make a "p" element + el = etree.SubElement(parentElem, "p") + + dump = [] + + # Searching for hr or header + for line in paragraph: + # it's hr + if CORE_RE["isline3"].match(line): + el.text = "\n".join(dump) + self.__processHR(el) + dump = [] + # it's header + elif line.startswith("#"): + el.text = "\n".join(dump) + self.__processHeader(parentElem, [line]) + dump = [] + else: + dump.append(line) + if dump: + text = "\n".join(dump) + el.text = text + + +class DefListExtension(markdown.Extension): + """ Add definition lists to Markdown. """ + + def extendMarkdown(self, md, md_globals): + """ Set the core parser to an instance of DefListParser. """ + md.parser = DefListParser() + + +def makeExtension(configs={}): + return DefListExtension(configs=configs) + diff --git a/test-markdown.py b/test-markdown.py index 3d89e3b..1445799 100755 --- a/test-markdown.py +++ b/test-markdown.py @@ -375,6 +375,7 @@ testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape") testDirectory("tests/extensions-x-codehilite") testDirectory("tests/extensions-x-wikilinks") testDirectory("tests/extensions-x-toc") +testDirectory("tests/extensions-x-def_list") print "\n### Final result ###" if len(failedTests): diff --git a/tests/extensions-x-def_list/simple_def-lists.html b/tests/extensions-x-def_list/simple_def-lists.html new file mode 100644 index 0000000..7cb4d81 --- /dev/null +++ b/tests/extensions-x-def_list/simple_def-lists.html @@ -0,0 +1,43 @@ +<p>Some text</p> +<dl> +<dt>term1</dt> +<dd> +<p>Def1</p> +</dd> +<dt>term2-1</dt> +<dt>term2-2</dt> +<dd> +<p>Def2-1</p> +</dd> +<dd> +<p>Def2-2</p> +</dd> +</dl> +<p>more text</p> +<dl> +<dt>term <em>3</em></dt> +<dd> +<p>def 3 +line <strong>2</strong> of def 3</p> +<p>paragraph 2 of def 3.</p> +</dd> +<dd> +<p>def 3-2</p> +<pre><code># A code block in a def +</code></pre> +<blockquote> +<p>a blockquote</p> +</blockquote> +<ul> +<li> +<p>a list item</p> +</li> +<li> +<blockquote> +<p>blockquote in list</p> +</blockquote> +</li> +</ul> +</dd> +</dl> +<p>final text.</p>
\ No newline at end of file diff --git a/tests/extensions-x-def_list/simple_def-lists.txt b/tests/extensions-x-def_list/simple_def-lists.txt new file mode 100644 index 0000000..20c028a --- /dev/null +++ b/tests/extensions-x-def_list/simple_def-lists.txt @@ -0,0 +1,29 @@ +Some text + +term1 +: Def1 + +term2-1 +term2-2 +: Def2-1 +: Def2-2 + +more text + +term *3* +: def 3 + line __2__ of def 3 + + paragraph 2 of def 3. + +: def 3-2 + + # A code block in a def + + > a blockquote + + * a list item + + * > blockquote in list + +final text. |