path: root/markdown.py
diff options
authorWaylan Limberg <waylan@gmail.com>2008-10-19 23:22:05 -0400
committerWaylan Limberg <waylan@gmail.com>2008-10-19 23:22:05 -0400
commit53f95cde694975e556db7569d3bd89f84459e7bb (patch)
treeebc2fe47208170c20be63d690fc5a19364f8f7e1 /markdown.py
parent15224bd352bc6c06ae05ffd78d5ecee9ea07f6ef (diff)
Made InlineProcessor a TreeProcessor. Now an extension can manipulate the tree either before or after InlinePatterns are run. Updated docs as well. This change should not affect existing extensions.
Diffstat (limited to 'markdown.py')
1 files changed, 281 insertions, 285 deletions
diff --git a/markdown.py b/markdown.py
index ae179b4..437530c 100755
--- a/markdown.py
+++ b/markdown.py
@@ -577,277 +577,6 @@ class MarkdownParser:
-This class handles basic Markdown parsing. It doesn't concern itself with
-inline elements such as **bold** or *italics*, but rather just catches blocks,
-lists, quotes, etc.
-class InlineProcessor:
- """
- An auxiliary class to traverse a Markdown tree, applying inline patterns.
- """
- def __init__ (self, patterns):
- self.__inlinePatterns = patterns
- self.__placeholder_prefix = INLINE_PLACEHOLDER_PREFIX
- self.__placeholder_suffix = ETX
- self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
- + len(self.__placeholder_suffix)
- self.__placeholder_re = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
- def __makePlaceholder(self, type):
- """ Generate a placeholder """
- id = "%04d" % len(self.stashed_nodes)
- return hash, id
- def __findPlaceholder(self, data, index):
- """
- Extract id from data string, start from index
- Keyword arguments:
- * data: string
- * index: index, from which we start search
- Returns: placeholder id and string index, after the found placeholder.
- """
- m = self.__placeholder_re.search(data, index)
- if m:
- return m.group(1), m.end()
- else:
- return None, index + 1
- def __stashNode(self, node, type):
- """ Add node to stash """
- placeholder, id = self.__makePlaceholder(type)
- self.stashed_nodes[id] = node
- return placeholder
- def __handleInline(self, data, patternIndex=0):
- """
- Process string with inline patterns and replace it
- with placeholders
- Keyword arguments:
- * data: A line of Markdown text
- * patternIndex: The index of the inlinePattern to start with
- Returns: String with placeholders.
- """
- if not isinstance(data, AtomicString):
- startIndex = 0
- while patternIndex < len(self.__inlinePatterns):
- data, matched, startIndex = self.__applyPattern(
- self.__inlinePatterns[patternIndex],
- data, patternIndex, startIndex)
- if not matched:
- patternIndex += 1
- return data
- def __processElementText(self, node, subnode, isText=True):
- """
- Process placeholders in Element.text or Element.tail
- of Elements popped from self.stashed_nodes.
- Keywords arguments:
- * node: parent node
- * subnode: processing node
- * isText: bool variable, True - it's text, False - it's tail
- Returns: None
- """
- if isText:
- text = subnode.text
- subnode.text = None
- else:
- text = subnode.tail
- subnode.tail = None
- childResult = self.__processPlaceholders(text, subnode)
- if not isText and node is not subnode:
- pos = node.getchildren().index(subnode)
- node.remove(subnode)
- else:
- pos = 0
- childResult.reverse()
- for newChild in childResult:
- node.insert(pos, newChild)
- def __processPlaceholders(self, data, parent):
- """
- Process string with placeholders and generate ElementTree tree.
- Keyword arguments:
- * data: string with placeholders instead of ElementTree elements.
- * parent: Element, which contains processing inline data
- Returns: list with ElementTree elements with applied inline patterns.
- """
- def linkText(text):
- if text:
- if result:
- if result[-1].tail:
- result[-1].tail += text
- else:
- result[-1].tail = text
- else:
- if parent.text:
- parent.text += text
- else:
- parent.text = text
- result = []
- strartIndex = 0
- while data:
- index = data.find(self.__placeholder_prefix, strartIndex)
- if index != -1:
- id, phEndIndex = self.__findPlaceholder(data, index)
- if self.stashed_nodes.has_key(id):
- node = self.stashed_nodes.get(id)
- if index > 0:
- text = data[strartIndex:index]
- linkText(text)
- if not isString(node): # it's Element
- for child in [node] + node.getchildren():
- if child.tail:
- if child.tail.strip():
- self.__processElementText(node, child, False)
- if child.text:
- if child.text.strip():
- self.__processElementText(child, child)
- else: # it's just a string
- linkText(node)
- strartIndex = phEndIndex
- continue
- strartIndex = phEndIndex
- result.append(node)
- else: # wrong placeholder
- end = index + len(prefix)
- linkText(data[strartIndex:end])
- strartIndex = end
- else:
- text = data[strartIndex:]
- linkText(text)
- data = ""
- return result
- def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
- """
- Check if the line fits the pattern, create the necessary
- elements, add it to stashed_nodes.
- Keyword arguments:
- * data: the text to be processed
- * pattern: the pattern to be checked
- * patternIndex: index of current pattern
- * startIndex: string index, from which we starting search
- Returns: String with placeholders instead of ElementTree elements.
- """
- match = pattern.getCompiledRegExp().match(data[startIndex:])
- leftData = data[:startIndex]
- if not match:
- return data, False, 0
- node = pattern.handleMatch(match)
- if node is None:
- return data, True, len(leftData) + match.span(len(match.groups()))[0]
- if not isString(node):
- if not isinstance(node.text, AtomicString):
- # We need to process current node too
- for child in [node] + node.getchildren():
- if not isString(node):
- if child.text:
- child.text = self.__handleInline(child.text,
- patternIndex + 1)
- if child.tail:
- child.tail = self.__handleInline(child.tail,
- patternIndex)
- placeholder = self.__stashNode(node, pattern.type())
- return "%s%s%s%s" % (leftData,
- match.group(1),
- placeholder, match.groups()[-1]), True, 0
- def applyInlinePatterns(self, markdownTree):
- """Apply inline patterns to a parsed Markdown tree.
- Iterate over ElementTree, find elements with inline tag, apply inline
- patterns and append newly created Elements to tree. If you don't
- want process your data with inline paterns, instead of normal string,
- use subclass AtomicString:
- node.text = AtomicString("data won't be processed with inline patterns")
- Arguments:
- * markdownTree: ElementTree object, representing Markdown tree.
- Returns: ElementTree object with applied inline patterns.
- """
- self.stashed_nodes = {}
- stack = [markdownTree.getroot()]
- while stack:
- currElement = stack.pop()
- insertQueue = []
- for child in currElement.getchildren():
- if child.text and not isinstance(child.text, AtomicString):
- text = child.text
- child.text = None
- lst = self.__processPlaceholders(self.__handleInline(
- text), child)
- stack += lst
- insertQueue.append((child, lst))
- if child.getchildren():
- stack.append(child)
- for element, lst in insertQueue:
- if element.text:
- element.text = handleAttributes(element.text, element)
- i = 0
- for newChild in lst:
- # Processing attributes
- if newChild.tail:
- newChild.tail = handleAttributes(newChild.tail,
- element)
- if newChild.text:
- newChild.text = handleAttributes(newChild.text,
- newChild)
- element.insert(i, newChild)
- i += 1
- return markdownTree
@@ -1482,25 +1211,266 @@ class Treeprocessor(Processor):
-class Postprocessor(Processor):
+class InlineProcessor(Treeprocessor):
+ """
+ A Treeprocessor that traverses a tree, applying inline patterns.
- Postprocessors are run after the ElementTree it converted back into text.
- Each Postprocessor implements a "run" method that takes a pointer to a
- text string, modifies it as necessary and returns a text string.
+ def __init__ (self, md):
+ #self.__inlinePatterns = patterns
+ self.__placeholder_prefix = INLINE_PLACEHOLDER_PREFIX
+ self.__placeholder_suffix = ETX
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+ + len(self.__placeholder_suffix)
+ self.__placeholder_re = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
- Postprocessors must extend markdown.Postprocessor.
+ def __makePlaceholder(self, type):
+ """ Generate a placeholder """
+ id = "%04d" % len(self.stashed_nodes)
+ return hash, id
- """
+ def __findPlaceholder(self, data, index):
+ """
+ Extract id from data string, start from index
- def run(self, text):
+ Keyword arguments:
+ * data: string
+ * index: index, from which we start search
+ Returns: placeholder id and string index, after the found placeholder.
- Subclasses of Postprocessor should implement a `run` method, which
- takes the html document as a single text string and returns a
- (possibly modified) string.
+ m = self.__placeholder_re.search(data, index)
+ if m:
+ return m.group(1), m.end()
+ else:
+ return None, index + 1
+ def __stashNode(self, node, type):
+ """ Add node to stash """
+ placeholder, id = self.__makePlaceholder(type)
+ self.stashed_nodes[id] = node
+ return placeholder
+ def __handleInline(self, data, patternIndex=0):
- pass
+ Process string with inline patterns and replace it
+ with placeholders
+ Keyword arguments:
+ * data: A line of Markdown text
+ * patternIndex: The index of the inlinePattern to start with
+ Returns: String with placeholders.
+ """
+ if not isinstance(data, AtomicString):
+ startIndex = 0
+ while patternIndex < len(self.patterns):
+ data, matched, startIndex = self.__applyPattern(
+ self.patterns[patternIndex],
+ data, patternIndex, startIndex)
+ if not matched:
+ patternIndex += 1
+ return data
+ def __processElementText(self, node, subnode, isText=True):
+ """
+ Process placeholders in Element.text or Element.tail
+ of Elements popped from self.stashed_nodes.
+ Keywords arguments:
+ * node: parent node
+ * subnode: processing node
+ * isText: bool variable, True - it's text, False - it's tail
+ Returns: None
+ """
+ if isText:
+ text = subnode.text
+ subnode.text = None
+ else:
+ text = subnode.tail
+ subnode.tail = None
+ childResult = self.__processPlaceholders(text, subnode)
+ if not isText and node is not subnode:
+ pos = node.getchildren().index(subnode)
+ node.remove(subnode)
+ else:
+ pos = 0
+ childResult.reverse()
+ for newChild in childResult:
+ node.insert(pos, newChild)
+ def __processPlaceholders(self, data, parent):
+ """
+ Process string with placeholders and generate ElementTree tree.
+ Keyword arguments:
+ * data: string with placeholders instead of ElementTree elements.
+ * parent: Element, which contains processing inline data
+ Returns: list with ElementTree elements with applied inline patterns.
+ """
+ def linkText(text):
+ if text:
+ if result:
+ if result[-1].tail:
+ result[-1].tail += text
+ else:
+ result[-1].tail = text
+ else:
+ if parent.text:
+ parent.text += text
+ else:
+ parent.text = text
+ result = []
+ strartIndex = 0
+ while data:
+ index = data.find(self.__placeholder_prefix, strartIndex)
+ if index != -1:
+ id, phEndIndex = self.__findPlaceholder(data, index)
+ if self.stashed_nodes.has_key(id):
+ node = self.stashed_nodes.get(id)
+ if index > 0:
+ text = data[strartIndex:index]
+ linkText(text)
+ if not isString(node): # it's Element
+ for child in [node] + node.getchildren():
+ if child.tail:
+ if child.tail.strip():
+ self.__processElementText(node, child, False)
+ if child.text:
+ if child.text.strip():
+ self.__processElementText(child, child)
+ else: # it's just a string
+ linkText(node)
+ strartIndex = phEndIndex
+ continue
+ strartIndex = phEndIndex
+ result.append(node)
+ else: # wrong placeholder
+ end = index + len(prefix)
+ linkText(data[strartIndex:end])
+ strartIndex = end
+ else:
+ text = data[strartIndex:]
+ linkText(text)
+ data = ""
+ return result
+ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
+ """
+ Check if the line fits the pattern, create the necessary
+ elements, add it to stashed_nodes.
+ Keyword arguments:
+ * data: the text to be processed
+ * pattern: the pattern to be checked
+ * patternIndex: index of current pattern
+ * startIndex: string index, from which we starting search
+ Returns: String with placeholders instead of ElementTree elements.
+ """
+ match = pattern.getCompiledRegExp().match(data[startIndex:])
+ leftData = data[:startIndex]
+ if not match:
+ return data, False, 0
+ node = pattern.handleMatch(match)
+ if node is None:
+ return data, True, len(leftData) + match.span(len(match.groups()))[0]
+ if not isString(node):
+ if not isinstance(node.text, AtomicString):
+ # We need to process current node too
+ for child in [node] + node.getchildren():
+ if not isString(node):
+ if child.text:
+ child.text = self.__handleInline(child.text,
+ patternIndex + 1)
+ if child.tail:
+ child.tail = self.__handleInline(child.tail,
+ patternIndex)
+ placeholder = self.__stashNode(node, pattern.type())
+ return "%s%s%s%s" % (leftData,
+ match.group(1),
+ placeholder, match.groups()[-1]), True, 0
+ def run(self, tree):
+ """Apply inline patterns to a parsed Markdown tree.
+ Iterate over ElementTree, find elements with inline tag, apply inline
+ patterns and append newly created Elements to tree. If you don't
+ want process your data with inline paterns, instead of normal string,
+ use subclass AtomicString:
+ node.text = AtomicString("data won't be processed with inline patterns")
+ Arguments:
+ * markdownTree: ElementTree object, representing Markdown tree.
+ Returns: ElementTree object with applied inline patterns.
+ """
+ self.stashed_nodes = {}
+ stack = [tree]
+ while stack:
+ currElement = stack.pop()
+ insertQueue = []
+ for child in currElement.getchildren():
+ if child.text and not isinstance(child.text, AtomicString):
+ text = child.text
+ child.text = None
+ lst = self.__processPlaceholders(self.__handleInline(
+ text), child)
+ stack += lst
+ insertQueue.append((child, lst))
+ if child.getchildren():
+ stack.append(child)
+ for element, lst in insertQueue:
+ if element.text:
+ element.text = handleAttributes(element.text, element)
+ i = 0
+ for newChild in lst:
+ # Processing attributes
+ if newChild.tail:
+ newChild.tail = handleAttributes(newChild.tail,
+ element)
+ if newChild.text:
+ newChild.text = handleAttributes(newChild.text,
+ newChild)
+ element.insert(i, newChild)
+ i += 1
+ return tree
class PrettifyTreeprocessor(Treeprocessor):
@@ -1533,6 +1503,28 @@ class PrettifyTreeprocessor(Treeprocessor):
br.tail = '\n%s' % br.tail
+class Postprocessor(Processor):
+ """
+ Postprocessors are run after the ElementTree it converted back into text.
+ Each Postprocessor implements a "run" method that takes a pointer to a
+ text string, modifies it as necessary and returns a text string.
+ Postprocessors must extend markdown.Postprocessor.
+ """
+ def run(self, text):
+ """
+ Subclasses of Postprocessor should implement a `run` method, which
+ takes the html document as a single text string and returns a
+ (possibly modified) string.
+ """
+ pass
class RawHtmlPostprocessor(Postprocessor):
""" Restore raw html to the document. """
@@ -1801,6 +1793,7 @@ class Markdown:
# footnote preprocessor will be inserted with "<reference"
self.treeprocessors = Treap()
+ self.treeprocessors.add("inline", InlineProcessor(self))
self.treeprocessors.add("prettify", PrettifyTreeprocessor(self))
self.postprocessors = Treap()
@@ -1841,6 +1834,9 @@ class Markdown:
configs = extension_configs)
+ # Sort and add patterns only after all extensions are loaded.
+ self.treeprocessors['inline'].patterns = self.inlinePatterns.heapsorted()
def registerExtensions(self, extensions, configs):
Register extensions with this instance of Markdown.
@@ -1915,11 +1911,11 @@ class Markdown:
self.lines = prep.run(self.lines)
# Parse the high-level elements.
- tree = self.parser.parseDocument(self.lines)
+ root = self.parser.parseDocument(self.lines).getroot()
# Apply inline patterns
- inlineProcessor = InlineProcessor(self.inlinePatterns.heapsorted())
- root = inlineProcessor.applyInlinePatterns(tree).getroot()
+ #inlineProcessor = InlineProcessor(self.inlinePatterns.heapsorted())
+ #root = inlineProcessor.applyInlinePatterns(tree).getroot()
# Run the tree-processors
for treeprocessor in self.treeprocessors.heapsorted():