aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuri Takhteyev <yuri@freewisdom.org>2008-10-12 19:37:20 -0700
committerYuri Takhteyev <yuri@freewisdom.org>2008-10-12 19:37:20 -0700
commit8b6b7b0a39321dadfcab4d0a16053377c4715bee (patch)
treeeba5691da176c5e93b41beb1a33843330cd82e32
parent8e7e2a254859f61ec2632a11725d99c5ea0c9f09 (diff)
downloadmarkdown-8b6b7b0a39321dadfcab4d0a16053377c4715bee.tar.gz
markdown-8b6b7b0a39321dadfcab4d0a16053377c4715bee.tar.bz2
markdown-8b6b7b0a39321dadfcab4d0a16053377c4715bee.zip
Refactored markdown tree traversing logic into a separate class
(InlineProcessor).
-rwxr-xr-xmarkdown.py391
1 files changed, 189 insertions, 202 deletions
diff --git a/markdown.py b/markdown.py
index 0aa530d..ae8dc10 100755
--- a/markdown.py
+++ b/markdown.py
@@ -1401,28 +1401,32 @@ class HtmlStash:
self.html_counter += 1
return placeholder
- def rest(self):
+ def reset(self):
self.html_counter = 0
self.rawHtmlBlocks = []
-
-class InlineStash:
-
- def __init__(self):
- """ Create a InlineStash. """
- self.prefix = INLINE_PLACEHOLDER_PREFIX
- self.suffix = ETX
- self._nodes = {}
- self.phLength = 4 + len(self.prefix) + len(self.suffix)
- self._placeholder_re = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
+
+class InlineProcessor:
+ """
+ An auxiliary class to traverse a Markdown tree, applying inline patterns.
+ """
+
+ def __init__ (self, patterns):
+ self.inlinePatterns = patterns
+
+ self.__placeholder_prefix = INLINE_PLACEHOLDER_PREFIX
+ self.__placeholder_suffix = ETX
+ self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+ + len(self.__placeholder_suffix)
+ self.__placeholder_re = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
- def _genPlaceholder(self, type):
+ def __makePlaceholder(self, type):
""" Generate a placeholder """
- id = "%04d" % len(self._nodes)
+ id = "%04d" % len(self.stashed_nodes)
hash = INLINE_PLACEHOLDER % id
return hash, id
- def extractId(self, data, index):
+ def __findPlaceholder(self, data, index):
"""
Extract id from data string, start from index
@@ -1434,164 +1438,19 @@ class InlineStash:
Returns: placeholder id and string index, after
found placeholder
"""
- m = self._placeholder_re.search(data, index)
+ m = self.__placeholder_re.search(data, index)
if m:
return m.group(1), m.end()
else:
return None, index + 1
- def isin(self, id):
- """ Check if node with given id exists in stash """
- return self._nodes.has_key(id)
-
- def get(self, id):
- """ Return node by id """
- return self._nodes.get(id)
-
- def add(self, node, type):
+ def __stashNode(self, node, type):
""" Add node to stash """
- pholder, id = self._genPlaceholder(type)
- self._nodes[id] = node
- return pholder
-
- def rest(self):
- """ Reset instance """
- self._nodes = {}
+ placeholder, id = self.__makePlaceholder(type)
+ self.stashed_nodes[id] = node
+ return placeholder
-
-
-class Markdown:
- """Convert Markdown to HTML."""
-
- def __init__(self,
- extensions=[],
- extension_configs={},
- safe_mode = False):
- """
- Creates a new Markdown instance.
-
- Keyword arguments:
-
- * extensions: A list of extensions.
- If they are of type string, the module mdx_name.py will be loaded.
- If they are a subclass of markdown.Extension, they will be used
- as-is.
- * extension-configs: Configuration setting for extensions.
- * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
-
- """
- self.parser = MarkdownParser()
- self.safeMode = safe_mode
- self.registeredExtensions = []
- self.docType = ""
- self.stripTopLevelTags = True
-
- self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
-
- self.preprocessors = [HEADER_PREPROCESSOR,
- LINE_PREPROCESSOR,
- # A footnote preprocessor will
- # get inserted here
- REFERENCE_PREPROCESSOR]
-
-
- self.postprocessors = [PRETTIFYPOSTPROCESSOR,
- # a footnote postprocessor will get
- # inserted later
- ]
-
- self.textPostprocessors = [# a footnote postprocessor will get
- # inserted here
- RAWHTMLTEXTPOSTPROCESSOR,
- AMPSUBSTITUTETEXTPOSTPROCESSOR]
-
- self.prePatterns = []
-
- self.inlinePatterns = [
- BACKTICK_PATTERN,
- ESCAPE_PATTERN,
- REFERENCE_PATTERN,
- LINK_PATTERN,
- IMAGE_LINK_PATTERN,
- IMAGE_REFERENCE_PATTERN,
- AUTOLINK_PATTERN,
- AUTOMAIL_PATTERN,
- LINE_BREAK_PATTERN_2,
- LINE_BREAK_PATTERN,
- HTML_PATTERN,
- ENTITY_PATTERN,
- NOT_STRONG_PATTERN,
- STRONG_EM_PATTERN,
- STRONG_PATTERN,
- EMPHASIS_PATTERN,
- EMPHASIS_PATTERN_2
- # The order of the handlers matters!!!
- ]
-
- self.inlineStash = InlineStash()
- self.references = {}
- self.htmlStash = HtmlStash()
-
-
- self.registerExtensions(extensions = extensions,
- configs = extension_configs)
-
- self.reset()
-
-
- def registerExtensions(self, extensions, configs):
- """
- Register extensions with this instance of Markdown.
-
- Keyword aurguments:
-
- * extensions: A list of extensions, which can either
- be strings or objects. See the docstring on Markdown.
- * configs: A dictionary mapping module names to config options.
-
- """
- for ext in extensions:
- if isinstance(ext, basestring):
- ext = load_extension(ext, configs.get(ext, []))
- elif hasattr(ext, 'extendMarkdown'):
- # Looks like an Extension.
- # Nothing to do here.
- pass
- else:
- message(ERROR, "Incorrect type! Extension '%s' is "
- "neither a string or an Extension." %(repr(ext)))
- continue
- ext.extendMarkdown(self, globals())
-
- def registerExtension(self, extension):
- """ This gets called by the extension """
- self.registeredExtensions.append(extension)
-
- def reset(self):
- """
- Resets all state variables so that we can start with a new text.
- """
- self.inlineStash.rest()
- self.htmlStash.rest()
- self.references.clear()
-
- HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
- LINE_PREPROCESSOR.stash = self.htmlStash
- REFERENCE_PREPROCESSOR.references = self.references
- HTML_PATTERN.stash = self.htmlStash
- ENTITY_PATTERN.stash = self.htmlStash
- REFERENCE_PATTERN.references = self.references
- IMAGE_REFERENCE_PATTERN.references = self.references
- RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
- RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
-
- for extension in self.registeredExtensions:
- extension.reset()
-
- for pattern in self.inlinePatterns:
- pattern.safe_mode = self.safeMode
-
- def _handleInline(self, data, patternIndex=0):
+ def __handleInline(self, data, patternIndex=0):
"""
Process string with inline patterns and replace it
with placeholders
@@ -1604,23 +1463,20 @@ class Markdown:
Returns: String with placeholders.
"""
- if isinstance(data, AtomicString):
- return data
-
- startIndex = 0
- while patternIndex < len(self.inlinePatterns):
- data, matched, startIndex = self._applyInline(
- self.inlinePatterns[patternIndex],
- data, patternIndex, startIndex)
- if not matched:
- patternIndex += 1
+ if not isinstance(data, AtomicString):
+ startIndex = 0
+ while patternIndex < len(self.inlinePatterns):
+ data, matched, startIndex = self.__applyPattern(
+ self.inlinePatterns[patternIndex],
+ data, patternIndex, startIndex)
+ if not matched:
+ patternIndex += 1
return data
-
- def _processElementText(self, node, subnode, isText=True):
+ def __processElementText(self, node, subnode, isText=True):
"""
Process placeholders in Element.text or Element.tail
- of Elements popped from InlineStash
+ of Elements popped from self.stashed_nodes.
Keywords arguments:
@@ -1638,7 +1494,7 @@ class Markdown:
text = subnode.tail
subnode.tail = None
- childResult = self._processPlaceholders(text, subnode)
+ childResult = self.__processPlaceholders(text, subnode)
if not isText and node is not subnode:
pos = node.getchildren().index(subnode)
@@ -1650,7 +1506,7 @@ class Markdown:
for newChild in childResult:
node.insert(pos, newChild)
- def _processPlaceholders(self, data, parent):
+ def __processPlaceholders(self, data, parent):
"""
Process string with placeholders and generate ElementTree tree.
@@ -1675,16 +1531,14 @@ class Markdown:
parent.text = text
result = []
- prefix = self.inlineStash.prefix
- strartIndex = 0
-
+ strartIndex = 0
while data:
- index = data.find(prefix, strartIndex)
+ index = data.find(self.__placeholder_prefix, strartIndex)
if index != -1:
- id, phEndIndex = self.inlineStash.extractId(data, index)
+ id, phEndIndex = self.__findPlaceholder(data, index)
- if self.inlineStash.isin(id):
- node = self.inlineStash.get(id)
+ if self.stashed_nodes.has_key(id):
+ node = self.stashed_nodes.get(id)
if index > 0:
text = data[strartIndex:index]
@@ -1694,10 +1548,10 @@ class Markdown:
for child in [node] + node.getchildren():
if child.tail:
if child.tail.strip():
- self._processElementText(node, child, False)
+ self.__processElementText(node, child, False)
if child.text:
if child.text.strip():
- self._processElementText(child, child)
+ self.__processElementText(child, child)
else: # it's just a string
linkText(node)
strartIndex = phEndIndex
@@ -1718,10 +1572,10 @@ class Markdown:
return result
- def _applyInline(self, pattern, data, patternIndex, startIndex=0):
+ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
"""
Check if the line fits the pattern, create the necessary
- elements, add it to InlineStash
+ elements, add it to stashed_nodes.
Keyword arguments:
@@ -1749,17 +1603,17 @@ class Markdown:
for child in [node] + node.getchildren():
if not isString(node):
if child.text:
- child.text = self._handleInline(child.text,
+ child.text = self.__handleInline(child.text,
patternIndex + 1)
if child.tail:
- child.tail = self._handleInline(child.tail,
+ child.tail = self.__handleInline(child.tail,
patternIndex)
- pholder = self.inlineStash.add(node, pattern.type())
+ placeholder = self.__stashNode(node, pattern.type())
return "%s%s%s%s" % (leftData,
match.group(1),
- pholder, match.groups()[-1]), True, 0
+ placeholder, match.groups()[-1]), True, 0
def applyInlinePatterns(self, markdownTree):
@@ -1777,6 +1631,8 @@ class Markdown:
Returns: ElementTree object with applied inline patterns.
"""
+ self.stashed_nodes = {}
+
stack = [markdownTree.getroot()]
while stack:
@@ -1786,7 +1642,7 @@ class Markdown:
if child.text and not isinstance(child.text, AtomicString):
text = child.text
child.text = None
- lst = self._processPlaceholders(self._handleInline(
+ lst = self.__processPlaceholders(self.__handleInline(
text), child)
stack += lst
insertQueue.append((child, lst))
@@ -1811,6 +1667,139 @@ class Markdown:
return markdownTree
+
+
+
+class Markdown:
+ """Convert Markdown to HTML."""
+
+ def __init__(self,
+ extensions=[],
+ extension_configs={},
+ safe_mode = False):
+ """
+ Creates a new Markdown instance.
+
+ Keyword arguments:
+
+ * extensions: A list of extensions.
+ If they are of type string, the module mdx_name.py will be loaded.
+ If they are a subclass of markdown.Extension, they will be used
+ as-is.
+ * extension-configs: Configuration setting for extensions.
+ * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
+
+ """
+ self.parser = MarkdownParser()
+ self.safeMode = safe_mode
+ self.registeredExtensions = []
+ self.docType = ""
+ self.stripTopLevelTags = True
+
+ self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
+
+ self.preprocessors = [HEADER_PREPROCESSOR,
+ LINE_PREPROCESSOR,
+ # A footnote preprocessor will
+ # get inserted here
+ REFERENCE_PREPROCESSOR]
+
+
+ self.postprocessors = [PRETTIFYPOSTPROCESSOR,
+ # a footnote postprocessor will get
+ # inserted later
+ ]
+
+ self.textPostprocessors = [# a footnote postprocessor will get
+ # inserted here
+ RAWHTMLTEXTPOSTPROCESSOR,
+ AMPSUBSTITUTETEXTPOSTPROCESSOR]
+
+ self.prePatterns = []
+
+ self.inlinePatterns = [
+ BACKTICK_PATTERN,
+ ESCAPE_PATTERN,
+ REFERENCE_PATTERN,
+ LINK_PATTERN,
+ IMAGE_LINK_PATTERN,
+ IMAGE_REFERENCE_PATTERN,
+ AUTOLINK_PATTERN,
+ AUTOMAIL_PATTERN,
+ LINE_BREAK_PATTERN_2,
+ LINE_BREAK_PATTERN,
+ HTML_PATTERN,
+ ENTITY_PATTERN,
+ NOT_STRONG_PATTERN,
+ STRONG_EM_PATTERN,
+ STRONG_PATTERN,
+ EMPHASIS_PATTERN,
+ EMPHASIS_PATTERN_2
+ # The order of the handlers matters!!!
+ ]
+
+ self.inlineProcessor = InlineProcessor(self.inlinePatterns)
+ self.references = {}
+ self.htmlStash = HtmlStash()
+
+
+ self.registerExtensions(extensions = extensions,
+ configs = extension_configs)
+
+ self.reset()
+
+
+ def registerExtensions(self, extensions, configs):
+ """
+ Register extensions with this instance of Markdown.
+
+ Keyword aurguments:
+
+ * extensions: A list of extensions, which can either
+ be strings or objects. See the docstring on Markdown.
+ * configs: A dictionary mapping module names to config options.
+
+ """
+ for ext in extensions:
+ if isinstance(ext, basestring):
+ ext = load_extension(ext, configs.get(ext, []))
+ elif hasattr(ext, 'extendMarkdown'):
+ # Looks like an Extension.
+ # Nothing to do here.
+ pass
+ else:
+ message(ERROR, "Incorrect type! Extension '%s' is "
+ "neither a string or an Extension." %(repr(ext)))
+ continue
+ ext.extendMarkdown(self, globals())
+
+ def registerExtension(self, extension):
+ """ This gets called by the extension """
+ self.registeredExtensions.append(extension)
+
+ def reset(self):
+ """
+ Resets all state variables so that we can start with a new text.
+ """
+ self.htmlStash.reset()
+ self.references.clear()
+
+ HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
+ LINE_PREPROCESSOR.stash = self.htmlStash
+ REFERENCE_PREPROCESSOR.references = self.references
+ HTML_PATTERN.stash = self.htmlStash
+ ENTITY_PATTERN.stash = self.htmlStash
+ REFERENCE_PATTERN.references = self.references
+ IMAGE_REFERENCE_PATTERN.references = self.references
+ RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
+ RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
+
+ for extension in self.registeredExtensions:
+ extension.reset()
+
+ for pattern in self.inlinePatterns:
+ pattern.safe_mode = self.safeMode
+
def convert (self, source):
"""Convert markdown to serialized XHTML."""
@@ -1823,10 +1812,8 @@ class Markdown:
message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
return u""
- source = source.replace(STX, "")
- source = source.replace(ETX, "")
- source = source.replace("\r\n", "\n").replace("\r", "\n")
- source += "\n\n"
+ source = source.replace(STX, "").replace(ETX, "")
+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
source = source.expandtabs(TAB_LENGTH)
# Run the text preprocessors
@@ -1841,7 +1828,8 @@ class Markdown:
# Parse the high-level elements.
tree = self.parser.parseDocument(self.lines)
- root = self.applyInlinePatterns(tree).getroot()
+ # Apply inline patterns
+ root = self.inlineProcessor.applyInlinePatterns(tree).getroot()
# Run the post-processors
for postprocessor in self.postprocessors:
@@ -2042,7 +2030,6 @@ def markdownFromFile(input = None,
encoding = None,
safe = False):
-
md = Markdown(extensions=load_extensions(extensions),
safe_mode = safe_mode)
md.convertFile(input, output, encoding)