From 159a274a977c496434dbc484a1b253663cde4eed Mon Sep 17 00:00:00 2001 From: Yuri Takhteyev Date: Mon, 17 Nov 2008 00:17:15 -0800 Subject: Attempting a refactoring, breaking markdown into multiple files. --- markdown/blockparser.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 markdown/blockparser.py (limited to 'markdown/blockparser.py') diff --git a/markdown/blockparser.py b/markdown/blockparser.py new file mode 100644 index 0000000..9e8c18f --- /dev/null +++ b/markdown/blockparser.py @@ -0,0 +1,105 @@ + +import markdown + +class State(list): + """ Track the current and nested state of the parser. + + This utility class is used to track the state of the BlockParser and + support multiple levels if nesting. It's just a simple API wrapped around + a list. Each time a state is set, that state is appended to the end of the + list. Each time a state is reset, that state is removed from the end of + the list. + + Therefore, each time a state is set for a nested block, that state must be + reset when we back out of that level of nesting or the state could be + corrupted. + + While all the methods of a list object are available, only the three + defined below need be used. + + """ + + def set(self, state): + """ Set a new state. """ + self.append(state) + + def reset(self): + """ Step back one step in nested state. """ + self.pop() + + def isstate(self, state): + """ Test that top (current) level is of given state. """ + if len(self): + return self[-1] == state + else: + return False + +class BlockParser: + """ Parse Markdown blocks into an ElementTree object. + + A wrapper class that stitches the various BlockProcessors together, + looping through them and creating an ElementTree object. + """ + + def __init__(self): + self.blockprocessors = markdown.OrderedDict() + self.blockprocessors['empty'] = markdown.blockprocessors.EmptyBlockProcessor(self) + self.blockprocessors['indent'] = markdown.blockprocessors.ListIndentProcessor(self) + self.blockprocessors['code'] = markdown.blockprocessors.CodeBlockProcessor(self) + self.blockprocessors['hashheader'] = markdown.blockprocessors.HashHeaderProcessor(self) + self.blockprocessors['setextheader'] = markdown.blockprocessors.SetextHeaderProcessor(self) + self.blockprocessors['hr'] = markdown.blockprocessors.HRProcessor(self) + self.blockprocessors['olist'] = markdown.blockprocessors.OListProcessor(self) + self.blockprocessors['ulist'] = markdown.blockprocessors.UListProcessor(self) + self.blockprocessors['quote'] = markdown.blockprocessors.BlockQuoteProcessor(self) + self.blockprocessors['paragraph'] = markdown.blockprocessors.ParagraphProcessor(self) + self.state = State() + + def parseDocument(self, lines): + """ Parse a markdown document into an ElementTree. + + Given a list of lines, an ElementTree object (not just a parent Element) + is created and the root element is passed to the parser as the parent. + The ElementTree object is returned. + + This should only be called on an entire document, not pieces. + + """ + # Create a ElementTree from the lines + root = markdown.etree.Element("div") + self.parseChunk(root, '\n'.join(lines)) + return markdown.etree.ElementTree(root) + + def parseChunk(self, parent, text): + """ Parse a chunk of markdown text and attach to given etree node. + + While the ``text`` argument is generally assumed to contain multiple + blocks which will be split on blank lines, it could contain only one + block. Generally, this method would be called by extensions when + block parsing is required. + + The ``parent`` etree Element passed in is altered in place. + Nothing is returned. + + """ + self.parseBlocks(parent, text.split('\n\n')) + + def parseBlocks(self, parent, blocks): + """ Process blocks of markdown text and attach to given etree node. + + Given a list of ``blocks``, each blockprocessor is stepped through + until there are no blocks left. While an extension could potentially + call this method directly, it's generally expected to be used internally. + + This is a public method as an extension may need to add/alter additional + BlockProcessors which call this method to recursively parse a nested + block. + + """ + while blocks: + for processor in self.blockprocessors.values(): + if processor.test(parent, blocks[0]): + processor.run(parent, blocks) + break + + -- cgit v1.2.3