diff options
author | Waylan Limberg <waylan@gmail.com> | 2010-07-07 10:11:45 -0400 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2010-07-07 10:11:45 -0400 |
commit | a33a04439905851b5b1a5db4104ec3a11b4ab1d3 (patch) | |
tree | 3b52768f0b0d928a688c79adcd531059ec7bc005 | |
parent | 018aa73e82941288a1178ded751cf29d9bc13581 (diff) | |
download | markdown-a33a04439905851b5b1a5db4104ec3a11b4ab1d3.tar.gz markdown-a33a04439905851b5b1a5db4104ec3a11b4ab1d3.tar.bz2 markdown-a33a04439905851b5b1a5db4104ec3a11b4ab1d3.zip |
Factored out the building of the various processors and patterns into utility functions called by a build_parser method on the Markdown class. Editing of the processors and patterns now all happen in one file for each type. Additionaly, a subclass of Markdown could potentially override the build_parser method and build a parser for a completely differant markup language without first building the default and then overriding it.
-rw-r--r-- | markdown/__init__.py | 118 | ||||
-rw-r--r-- | markdown/blockprocessors.py | 18 | ||||
-rw-r--r-- | markdown/inlinepatterns.py | 28 | ||||
-rw-r--r-- | markdown/postprocessors.py | 9 | ||||
-rw-r--r-- | markdown/preprocessors.py | 10 | ||||
-rw-r--r-- | markdown/treeprocessors.py | 10 |
6 files changed, 90 insertions, 103 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py index 6d16b0e..ef6603e 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -69,13 +69,11 @@ Those steps are put together by the Markdown() class. from logging import DEBUG, INFO, WARN, ERROR, CRITICAL from md_logging import message import util -import preprocessors -import blockprocessors -import treeprocessors -import inlinepatterns -import postprocessors -import blockparser -import odict +from preprocessors import build_preprocessors +from blockprocessors import build_block_parser +from treeprocessors import build_treeprocessors +from inlinepatterns import build_inlinepatterns +from postprocessors import build_postprocessors from extensions import Extension, load_extension, load_extensions # For backwards compatibility in the 2.0.x series @@ -93,7 +91,6 @@ class Markdown: TAB_LENGTH = 4 # expand tabs to this many spaces ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that - DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode DOC_TAG = "div" # Element used to wrap document - later removed @@ -129,98 +126,7 @@ class Markdown: self.docType = "" self.stripTopLevelTags = True - # Preprocessors - self.preprocessors = odict.OrderedDict() - self.preprocessors["html_block"] = \ - preprocessors.HtmlBlockPreprocessor(self) - self.preprocessors["reference"] = \ - preprocessors.ReferencePreprocessor(self) - # footnote preprocessor will be inserted with "<reference" - - # Block processors - ran by the parser - self.parser = blockparser.BlockParser(self) - self.parser.blockprocessors['empty'] = \ - blockprocessors.EmptyBlockProcessor(self.parser) - self.parser.blockprocessors['indent'] = \ - blockprocessors.ListIndentProcessor(self.parser) - self.parser.blockprocessors['code'] = \ - blockprocessors.CodeBlockProcessor(self.parser) - self.parser.blockprocessors['hashheader'] = \ - blockprocessors.HashHeaderProcessor(self.parser) - self.parser.blockprocessors['setextheader'] = \ - blockprocessors.SetextHeaderProcessor(self.parser) - self.parser.blockprocessors['hr'] = \ - blockprocessors.HRProcessor(self.parser) - self.parser.blockprocessors['olist'] = \ - blockprocessors.OListProcessor(self.parser) - self.parser.blockprocessors['ulist'] = \ - blockprocessors.UListProcessor(self.parser) - self.parser.blockprocessors['quote'] = \ - blockprocessors.BlockQuoteProcessor(self.parser) - self.parser.blockprocessors['paragraph'] = \ - blockprocessors.ParagraphProcessor(self.parser) - - - #self.prePatterns = [] - - # Inline patterns - Run on the tree - self.inlinePatterns = odict.OrderedDict() - self.inlinePatterns["backtick"] = \ - inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE) - self.inlinePatterns["escape"] = \ - inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE) - self.inlinePatterns["reference"] = \ - inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self) - self.inlinePatterns["link"] = \ - inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self) - self.inlinePatterns["image_link"] = \ - inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self) - self.inlinePatterns["image_reference"] = \ - inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self) - self.inlinePatterns["autolink"] = \ - inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self) - self.inlinePatterns["automail"] = \ - inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self) - self.inlinePatterns["linebreak2"] = \ - inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br') - self.inlinePatterns["linebreak"] = \ - inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br') - self.inlinePatterns["html"] = \ - inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self) - self.inlinePatterns["entity"] = \ - inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self) - self.inlinePatterns["not_strong"] = \ - inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE) - self.inlinePatterns["strong_em"] = \ - inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em') - self.inlinePatterns["strong"] = \ - inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong') - self.inlinePatterns["emphasis"] = \ - inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em') - if self.SMART_EMPHASIS: - self.inlinePatterns["emphasis2"] = \ - inlinepatterns.SimpleTagPattern( \ - inlinepatterns.SMART_EMPHASIS_RE, 'em') - else: - self.inlinePatterns["emphasis2"] = \ - inlinepatterns.SimpleTagPattern( \ - inlinepatterns.EMPHASIS_2_RE, 'em') - # The order of the handlers matters!!! - - - # Tree processors - run once we have a basic parse. - self.treeprocessors = odict.OrderedDict() - self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self) - self.treeprocessors["prettify"] = \ - treeprocessors.PrettifyTreeprocessor(self) - - # Postprocessors - finishing touches. - self.postprocessors = odict.OrderedDict() - self.postprocessors["raw_html"] = \ - postprocessors.RawHtmlPostprocessor(self) - self.postprocessors["amp_substitute"] = \ - postprocessors.AndSubstitutePostprocessor() - # footnote postprocessor will be inserted with ">amp_substitute" + self.build_parser() # Map format keys to serializers self.output_formats = { @@ -237,6 +143,14 @@ class Markdown: self.set_output_format(output_format) self.reset() + def build_parser(self): + """ Build the parser from the various parts. """ + self.preprocessors = build_preprocessors(self) + self.parser = build_block_parser(self) + self.inlinePatterns = build_inlinepatterns(self) + self.treeprocessors = build_treeprocessors(self) + self.postprocessors = build_postprocessors(self) + def registerExtensions(self, extensions, configs): """ Register extensions with this instance of Markdown. @@ -276,10 +190,10 @@ class Markdown: if hasattr(extension, 'reset'): extension.reset() - def set_output_format(self, format): + def set_output_format(self, format=None): """ Set the output format for the class instance. """ if format is None: - format = self.DEFAULT_OUTPUT_FORMAT + format = 'xhtml1' #DEFAULT try: self.serializer = self.output_formats[format.lower()] except KeyError: diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 4e39200..399a523 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -14,9 +14,27 @@ as they need to alter how markdown blocks are parsed. import re import util +from blockparser import BlockParser from logging import CRITICAL from md_logging import message + +def build_block_parser(md_instance, **kwargs): + """ Build the default block parser used by Markdown. """ + parser = BlockParser(md_instance) + parser.blockprocessors['empty'] = EmptyBlockProcessor(parser) + parser.blockprocessors['indent'] = ListIndentProcessor(parser) + parser.blockprocessors['code'] = CodeBlockProcessor(parser) + parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser) + parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser) + parser.blockprocessors['hr'] = HRProcessor(parser) + parser.blockprocessors['olist'] = OListProcessor(parser) + parser.blockprocessors['ulist'] = UListProcessor(parser) + parser.blockprocessors['quote'] = BlockQuoteProcessor(parser) + parser.blockprocessors['paragraph'] = ParagraphProcessor(parser) + return parser + + class BlockProcessor: """ Base class for block processors. diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index a718b7e..7a78b5e 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -42,6 +42,7 @@ So, we apply the expressions in the following order: """ import util +import odict import re from urlparse import urlparse, urlunparse import sys @@ -50,6 +51,33 @@ if sys.version >= "3.0": else: import htmlentitydefs + +def build_inlinepatterns(md_instance, **kwargs): + """ Build the default set of inline patterns for Markdown. """ + inlinePatterns = odict.OrderedDict() + inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE) + inlinePatterns["escape"] = SimpleTextPattern(ESCAPE_RE) + inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance) + inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance) + inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance) + inlinePatterns["image_reference"] = \ + ImageReferencePattern(IMAGE_REFERENCE_RE, md_instance) + inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) + inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) + inlinePatterns["linebreak2"] = SubstituteTagPattern(LINE_BREAK_2_RE, 'br') + inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') + inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) + inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance) + inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE) + inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'strong,em') + inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong') + inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em') + if md_instance.SMART_EMPHASIS: + inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em') + else: + inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em') + return inlinePatterns + """ The actual regular expressions for patterns ----------------------------------------------------------------------------- diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 41c34fc..4d93082 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -9,6 +9,15 @@ processing. """ import util +import odict + +def build_postprocessors(md_instance, **kwargs): + """ Build the default postprocessors for Markdown. """ + postprocessors = odict.OrderedDict() + postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance) + postprocessors["amp_substitute"] = AndSubstitutePostprocessor() + return postprocessors + class Postprocessor(util.Processor): """ diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 567621b..a83a78a 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -1,4 +1,3 @@ - """ PRE-PROCESSORS ============================================================================= @@ -9,6 +8,15 @@ complicated. import re import util +import odict + + +def build_preprocessors(md_instance, **kwargs): + """ Build the default set of preprocessors used by Markdown. """ + preprocessors = odict.OrderedDict() + preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) + preprocessors["reference"] = ReferencePreprocessor(md_instance) + return preprocessors class Preprocessor(util.Processor): diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 6aeb142..50df486 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -2,6 +2,16 @@ import re import inlinepatterns import util +import odict + + +def build_treeprocessors(md_instance, **kwargs): + """ Build the default treeprocessors for Markdown. """ + treeprocessors = odict.OrderedDict() + treeprocessors["inline"] = InlineProcessor(md_instance) + treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance) + return treeprocessors + def isString(s): """ Check if it's string """ |