Attempting a refactoring, breaking markdown into multiple files.

author: Yuri Takhteyev <yuri@freewisdom.org> 2008-11-17 00:17:15 -0800
committer: Yuri Takhteyev <yuri@freewisdom.org> 2008-11-17 00:17:15 -0800
commit: 159a274a977c496434dbc484a1b253663cde4eed (patch)
tree: 53c9a6d3c69cbb8be3e6b47ea0d35cab075f03c9 /markdown/blockparser.py
parent: 3dfcbc8d7900aa0f07124c9d7598cb7ecc2ff41b (diff)
download: markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.gz
markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.bz2
markdown-159a274a977c496434dbc484a1b253663cde4eed.zip
1 files changed, 105 insertions, 0 deletions
diff --git a/markdown/blockparser.py b/markdown/blockparser.py
new file mode 100644
index 0000000..9e8c18f
--- /dev/null
+++ b/markdown/blockparser.py
@@ -0,0 +1,105 @@
+
+import markdown
+
+class State(list):
+    """ Track the current and nested state of the parser. 
+    
+    This utility class is used to track the state of the BlockParser and 
+    support multiple levels if nesting. It's just a simple API wrapped around
+    a list. Each time a state is set, that state is appended to the end of the
+    list. Each time a state is reset, that state is removed from the end of
+    the list.
+
+    Therefore, each time a state is set for a nested block, that state must be 
+    reset when we back out of that level of nesting or the state could be
+    corrupted.
+
+    While all the methods of a list object are available, only the three
+    defined below need be used.
+
+    """
+
+    def set(self, state):
+        """ Set a new state. """
+        self.append(state)
+
+    def reset(self):
+        """ Step back one step in nested state. """
+        self.pop()
+
+    def isstate(self, state):
+        """ Test that top (current) level is of given state. """
+        if len(self):
+            return self[-1] == state
+        else:
+            return False
+
+class BlockParser:
+    """ Parse Markdown blocks into an ElementTree object. 
+    
+    A wrapper class that stitches the various BlockProcessors together,
+    looping through them and creating an ElementTree object.
+    """
+
+    def __init__(self):
+        self.blockprocessors = markdown.OrderedDict()
+        self.blockprocessors['empty'] = markdown.blockprocessors.EmptyBlockProcessor(self)
+        self.blockprocessors['indent'] = markdown.blockprocessors.ListIndentProcessor(self)
+        self.blockprocessors['code'] = markdown.blockprocessors.CodeBlockProcessor(self)
+        self.blockprocessors['hashheader'] = markdown.blockprocessors.HashHeaderProcessor(self)
+        self.blockprocessors['setextheader'] = markdown.blockprocessors.SetextHeaderProcessor(self)
+        self.blockprocessors['hr'] = markdown.blockprocessors.HRProcessor(self)
+        self.blockprocessors['olist'] = markdown.blockprocessors.OListProcessor(self)
+        self.blockprocessors['ulist'] = markdown.blockprocessors.UListProcessor(self)
+        self.blockprocessors['quote'] = markdown.blockprocessors.BlockQuoteProcessor(self)
+        self.blockprocessors['paragraph'] = markdown.blockprocessors.ParagraphProcessor(self)
+        self.state = State()
+
+    def parseDocument(self, lines):
+        """ Parse a markdown document into an ElementTree. 
+        
+        Given a list of lines, an ElementTree object (not just a parent Element)
+        is created and the root element is passed to the parser as the parent.
+        The ElementTree object is returned.
+        
+        This should only be called on an entire document, not pieces.
+
+        """
+        # Create a ElementTree from the lines
+        root = markdown.etree.Element("div")
+        self.parseChunk(root, '\n'.join(lines))
+        return markdown.etree.ElementTree(root)
+
+    def parseChunk(self, parent, text):
+        """ Parse a chunk of markdown text and attach to given etree node. 
+        
+        While the ``text`` argument is generally assumed to contain multiple
+        blocks which will be split on blank lines, it could contain only one
+        block. Generally, this method would be called by extensions when
+        block parsing is required. 
+        
+        The ``parent`` etree Element passed in is altered in place. 
+        Nothing is returned.
+
+        """
+        self.parseBlocks(parent, text.split('\n\n'))
+
+    def parseBlocks(self, parent, blocks):
+        """ Process blocks of markdown text and attach to given etree node. 
+        
+        Given a list of ``blocks``, each blockprocessor is stepped through
+        until there are no blocks left. While an extension could potentially
+        call this method directly, it's generally expected to be used internally.
+
+        This is a public method as an extension may need to add/alter additional
+        BlockProcessors which call this method to recursively parse a nested
+        block.
+
+        """
+        while blocks:
+           for processor in self.blockprocessors.values():
+               if processor.test(parent, blocks[0]):
+                   processor.run(parent, blocks)
+                   break
+
+
author	Yuri Takhteyev <yuri@freewisdom.org>	2008-11-17 00:17:15 -0800
committer	Yuri Takhteyev <yuri@freewisdom.org>	2008-11-17 00:17:15 -0800
commit	159a274a977c496434dbc484a1b253663cde4eed (patch)
tree	53c9a6d3c69cbb8be3e6b47ea0d35cab075f03c9 /markdown/blockparser.py
parent	3dfcbc8d7900aa0f07124c9d7598cb7ecc2ff41b (diff)
download	markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.gz markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.bz2 markdown-159a274a977c496434dbc484a1b253663cde4eed.zip