From a33a04439905851b5b1a5db4104ec3a11b4ab1d3 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan@gmail.com>
Date: Wed, 7 Jul 2010 10:11:45 -0400
Subject: Factored out the building of the various processors and patterns into
 utility functions called by a build_parser method on the Markdown class. 
 Editing of the processors and patterns now all happen in one file for each
 type.  Additionaly, a subclass of Markdown could potentially override the
 build_parser method and build a parser for a completely differant markup
 language without first building the default and then overriding it.

---
 markdown/__init__.py        | 118 ++++++--------------------------------------
 markdown/blockprocessors.py |  18 +++++++
 markdown/inlinepatterns.py  |  28 +++++++++++
 markdown/postprocessors.py  |   9 ++++
 markdown/preprocessors.py   |  10 +++-
 markdown/treeprocessors.py  |  10 ++++
 6 files changed, 90 insertions(+), 103 deletions(-)
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 6d16b0e..ef6603e 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -69,13 +69,11 @@ Those steps are put together by the Markdown() class.
 from logging import DEBUG, INFO, WARN, ERROR, CRITICAL
 from md_logging import message
 import util
-import preprocessors
-import blockprocessors
-import treeprocessors
-import inlinepatterns
-import postprocessors
-import blockparser
-import odict
+from preprocessors import build_preprocessors
+from blockprocessors import build_block_parser
+from treeprocessors import build_treeprocessors
+from inlinepatterns import build_inlinepatterns
+from postprocessors import build_postprocessors
 from extensions import Extension, load_extension, load_extensions
 
 # For backwards compatibility in the 2.0.x series
@@ -93,7 +91,6 @@ class Markdown:
     TAB_LENGTH = 4               # expand tabs to this many spaces
     ENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
     SMART_EMPHASIS = True        # this_or_that does not become this<i>or</i>that
-    DEFAULT_OUTPUT_FORMAT = 'xhtml1'     # xhtml or html4 output
     HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
     DOC_TAG = "div"     # Element used to wrap document - later removed
 
@@ -129,98 +126,7 @@ class Markdown:
         self.docType = ""
         self.stripTopLevelTags = True
 
-        # Preprocessors
-        self.preprocessors = odict.OrderedDict()
-        self.preprocessors["html_block"] = \
-                preprocessors.HtmlBlockPreprocessor(self)
-        self.preprocessors["reference"] = \
-                preprocessors.ReferencePreprocessor(self)
-        # footnote preprocessor will be inserted with "<reference"
-
-        # Block processors - ran by the parser
-        self.parser = blockparser.BlockParser(self)
-        self.parser.blockprocessors['empty'] = \
-                blockprocessors.EmptyBlockProcessor(self.parser)
-        self.parser.blockprocessors['indent'] = \
-                blockprocessors.ListIndentProcessor(self.parser)
-        self.parser.blockprocessors['code'] = \
-                blockprocessors.CodeBlockProcessor(self.parser)
-        self.parser.blockprocessors['hashheader'] = \
-                blockprocessors.HashHeaderProcessor(self.parser)
-        self.parser.blockprocessors['setextheader'] = \
-                blockprocessors.SetextHeaderProcessor(self.parser)
-        self.parser.blockprocessors['hr'] = \
-                blockprocessors.HRProcessor(self.parser)
-        self.parser.blockprocessors['olist'] = \
-                blockprocessors.OListProcessor(self.parser)
-        self.parser.blockprocessors['ulist'] = \
-                blockprocessors.UListProcessor(self.parser)
-        self.parser.blockprocessors['quote'] = \
-                blockprocessors.BlockQuoteProcessor(self.parser)
-        self.parser.blockprocessors['paragraph'] = \
-                blockprocessors.ParagraphProcessor(self.parser)
-
-
-        #self.prePatterns = []
-
-        # Inline patterns - Run on the tree
-        self.inlinePatterns = odict.OrderedDict()
-        self.inlinePatterns["backtick"] = \
-                inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
-        self.inlinePatterns["escape"] = \
-                inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
-        self.inlinePatterns["reference"] = \
-            inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
-        self.inlinePatterns["link"] = \
-                inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
-        self.inlinePatterns["image_link"] = \
-                inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
-        self.inlinePatterns["image_reference"] = \
-            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
-        self.inlinePatterns["autolink"] = \
-            inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
-        self.inlinePatterns["automail"] = \
-            inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
-        self.inlinePatterns["linebreak2"] = \
-            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
-        self.inlinePatterns["linebreak"] = \
-            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
-        self.inlinePatterns["html"] = \
-                inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
-        self.inlinePatterns["entity"] = \
-                inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
-        self.inlinePatterns["not_strong"] = \
-                inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
-        self.inlinePatterns["strong_em"] = \
-            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
-        self.inlinePatterns["strong"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
-        self.inlinePatterns["emphasis"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
-        if self.SMART_EMPHASIS:
-            self.inlinePatterns["emphasis2"] = \
-                inlinepatterns.SimpleTagPattern( \
-                                        inlinepatterns.SMART_EMPHASIS_RE, 'em')
-        else:
-            self.inlinePatterns["emphasis2"] = \
-                inlinepatterns.SimpleTagPattern( \
-                                        inlinepatterns.EMPHASIS_2_RE, 'em')
-        # The order of the handlers matters!!!
-
-
-        # Tree processors - run once we have a basic parse.
-        self.treeprocessors = odict.OrderedDict()
-        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
-        self.treeprocessors["prettify"] = \
-                treeprocessors.PrettifyTreeprocessor(self)
-
-        # Postprocessors - finishing touches.
-        self.postprocessors = odict.OrderedDict()
-        self.postprocessors["raw_html"] = \
-                postprocessors.RawHtmlPostprocessor(self)
-        self.postprocessors["amp_substitute"] = \
-                postprocessors.AndSubstitutePostprocessor()
-        # footnote postprocessor will be inserted with ">amp_substitute"
+        self.build_parser()
 
         # Map format keys to serializers
         self.output_formats = {
@@ -237,6 +143,14 @@ class Markdown:
         self.set_output_format(output_format)
         self.reset()
 
+    def build_parser(self):
+        """ Build the parser from the various parts. """
+        self.preprocessors = build_preprocessors(self)
+        self.parser = build_block_parser(self) 
+        self.inlinePatterns = build_inlinepatterns(self)
+        self.treeprocessors = build_treeprocessors(self)
+        self.postprocessors = build_postprocessors(self)
+
     def registerExtensions(self, extensions, configs):
         """
         Register extensions with this instance of Markdown.
@@ -276,10 +190,10 @@ class Markdown:
             if hasattr(extension, 'reset'):
                 extension.reset()
 
-    def set_output_format(self, format):
+    def set_output_format(self, format=None):
         """ Set the output format for the class instance. """
         if format is None:
-            format = self.DEFAULT_OUTPUT_FORMAT
+            format = 'xhtml1' #DEFAULT
         try:
             self.serializer = self.output_formats[format.lower()]
         except KeyError:
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 4e39200..399a523 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -14,9 +14,27 @@ as they need to alter how markdown blocks are parsed.
 
 import re
 import util
+from blockparser import BlockParser
 from logging import CRITICAL
 from md_logging import message
 
+
+def build_block_parser(md_instance, **kwargs):
+    """ Build the default block parser used by Markdown. """
+    parser = BlockParser(md_instance)
+    parser.blockprocessors['empty'] = EmptyBlockProcessor(parser)
+    parser.blockprocessors['indent'] = ListIndentProcessor(parser)
+    parser.blockprocessors['code'] = CodeBlockProcessor(parser)
+    parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser)
+    parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser)
+    parser.blockprocessors['hr'] = HRProcessor(parser)
+    parser.blockprocessors['olist'] = OListProcessor(parser)
+    parser.blockprocessors['ulist'] = UListProcessor(parser)
+    parser.blockprocessors['quote'] = BlockQuoteProcessor(parser)
+    parser.blockprocessors['paragraph'] = ParagraphProcessor(parser)
+    return parser
+
+
 class BlockProcessor:
     """ Base class for block processors. 
     
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index a718b7e..7a78b5e 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -42,6 +42,7 @@ So, we apply the expressions in the following order:
 """
 
 import util
+import odict
 import re
 from urlparse import urlparse, urlunparse
 import sys
@@ -50,6 +51,33 @@ if sys.version >= "3.0":
 else:
     import htmlentitydefs
 
+
+def build_inlinepatterns(md_instance, **kwargs):
+    """ Build the default set of inline patterns for Markdown. """
+    inlinePatterns = odict.OrderedDict()
+    inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE)
+    inlinePatterns["escape"] = SimpleTextPattern(ESCAPE_RE)
+    inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance)
+    inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance)
+    inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance)
+    inlinePatterns["image_reference"] = \
+            ImageReferencePattern(IMAGE_REFERENCE_RE, md_instance)
+    inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
+    inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
+    inlinePatterns["linebreak2"] = SubstituteTagPattern(LINE_BREAK_2_RE, 'br')
+    inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
+    inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
+    inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance)
+    inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE)
+    inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
+    inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong')
+    inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em')
+    if md_instance.SMART_EMPHASIS:
+        inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em')
+    else:
+        inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em')
+    return inlinePatterns
+
 """
 The actual regular expressions for patterns
 -----------------------------------------------------------------------------
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index 41c34fc..4d93082 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -9,6 +9,15 @@ processing.
 """
 
 import util
+import odict
+
+def build_postprocessors(md_instance, **kwargs):
+    """ Build the default postprocessors for Markdown. """
+    postprocessors = odict.OrderedDict()
+    postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
+    postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
+    return postprocessors
+
 
 class Postprocessor(util.Processor):
     """
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index 567621b..a83a78a 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -1,4 +1,3 @@
-
 """
 PRE-PROCESSORS
 =============================================================================
@@ -9,6 +8,15 @@ complicated.
 
 import re
 import util
+import odict
+
+
+def build_preprocessors(md_instance, **kwargs):
+    """ Build the default set of preprocessors used by Markdown. """
+    preprocessors = odict.OrderedDict()
+    preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
+    preprocessors["reference"] = ReferencePreprocessor(md_instance)
+    return preprocessors
 
 
 class Preprocessor(util.Processor):
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index 6aeb142..50df486 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -2,6 +2,16 @@ import re
 
 import inlinepatterns
 import util
+import odict
+
+
+def build_treeprocessors(md_instance, **kwargs):
+    """ Build the default treeprocessors for Markdown. """
+    treeprocessors = odict.OrderedDict()
+    treeprocessors["inline"] = InlineProcessor(md_instance)
+    treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
+    return treeprocessors
+
 
 def isString(s):
     """ Check if it's string """
-- 
cgit v1.2.3