From 6d5b96eac3a9bbba4898352ccce82b784540ed01 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 13 Feb 2008 04:20:34 +0000 Subject: Added better documentation to pre & post proccessors and fixed inheritance issues. --- markdown.py | 218 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 128 insertions(+), 90 deletions(-) diff --git a/markdown.py b/markdown.py index e282088..2ad1495 100644 --- a/markdown.py +++ b/markdown.py @@ -405,85 +405,43 @@ class EntityReference: Preprocessors munge source text before we start doing anything too complicated. -Each preprocessor implements a "run" method that takes a pointer to a -list of lines of the document, modifies it as necessary and returns -either the same pointer or a pointer to a new list. Preprocessors -must extend markdown.Preprocessor. +There are two types of preprocessors: TextPreprocessor and Preprocessor. """ -class Preprocessor: - pass - - -class HeaderPreprocessor (Preprocessor): - - """ - Replaces underlined headers with hashed headers to avoid - the nead for lookahead later. - """ - - def run (self, lines): - - i = -1 - while i+1 < len(lines): - i = i+1 - if not lines[i].strip(): - continue - - if lines[i].startswith("#"): - lines.insert(i+1, "\n") - - if (i+1 <= len(lines) - and lines[i+1] - and lines[i+1][0] in ['-', '=']): - - underline = lines[i+1].strip() - - if underline == "="*len(underline): - lines[i] = "# " + lines[i].strip() - lines[i+1] = "" - elif underline == "-"*len(underline): - lines[i] = "## " + lines[i].strip() - lines[i+1] = "" - - return lines - -HEADER_PREPROCESSOR = HeaderPreprocessor() - -class LinePreprocessor (Preprocessor): - """Deals with HR lines (needs to be done before processing lists)""" +class TextPreprocessor: + ''' + TextPreprocessors are run before the text is broken into lines. + + Each TextPreprocessor implements a "run" method that takes a pointer to a + text string of the document, modifies it as necessary and returns + either the same pointer or a pointer to a new string. + + TextPreprocessors must extend markdown.TextPreprocessor. + ''' - blockquote_re = re.compile(r'^(> )+') + def run(self, text): + pass - def run (self, lines): - for i in range(len(lines)): - prefix = '' - m = self.blockquote_re.search(lines[i]) - if m : prefix = m.group(0) - if self._isLine(lines[i][len(prefix):]): - lines[i] = prefix + self.stash.store("
", safe=True) - return lines - def _isLine(self, block): - """Determines if a block should be replaced with an
""" - if block.startswith(" "): return 0 # a code block - text = "".join([x for x in block if not x.isspace()]) - if len(text) <= 2: - return 0 - for pattern in ['isline1', 'isline2', 'isline3']: - m = RE.regExp[pattern].match(text) - if (m and m.group(1)): - return 1 - else: - return 0 +class Preprocessor: + ''' + Preprocessors are run after the text is broken into lines. -LINE_PREPROCESSOR = LinePreprocessor() + Each preprocessor implements a "run" method that takes a pointer to a + list of lines of the document, modifies it as necessary and returns + either the same pointer or a pointer to a new list. + + Preprocessors must extend markdown.Preprocessor. + ''' + def run(self, lines): + pass + -class HtmlBlockPreprocessor (Preprocessor): - """Removes html blocks from self.lines""" +class HtmlBlockPreprocessor(TextPreprocessor): + """Removes html blocks from the source text and stores it.""" def _get_left_tag(self, block): return block[1:].replace(">", " ", 1).split()[0].lower() @@ -510,7 +468,7 @@ class HtmlBlockPreprocessor (Preprocessor): return (tag in ['hr', 'hr/']) - def run (self, text): + def run(self, text): new_blocks = [] text = text.split("\n\n") @@ -580,7 +538,76 @@ class HtmlBlockPreprocessor (Preprocessor): HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor() -class ReferencePreprocessor (Preprocessor): +class HeaderPreprocessor(Preprocessor): + + """ + Replaces underlined headers with hashed headers to avoid + the nead for lookahead later. + """ + + def run (self, lines): + + i = -1 + while i+1 < len(lines): + i = i+1 + if not lines[i].strip(): + continue + + if lines[i].startswith("#"): + lines.insert(i+1, "\n") + + if (i+1 <= len(lines) + and lines[i+1] + and lines[i+1][0] in ['-', '=']): + + underline = lines[i+1].strip() + + if underline == "="*len(underline): + lines[i] = "# " + lines[i].strip() + lines[i+1] = "" + elif underline == "-"*len(underline): + lines[i] = "## " + lines[i].strip() + lines[i+1] = "" + + return lines + +HEADER_PREPROCESSOR = HeaderPreprocessor() + + +class LinePreprocessor(Preprocessor): + """Deals with HR lines (needs to be done before processing lists)""" + + blockquote_re = re.compile(r'^(> )+') + + def run (self, lines): + for i in range(len(lines)): + prefix = '' + m = self.blockquote_re.search(lines[i]) + if m : prefix = m.group(0) + if self._isLine(lines[i][len(prefix):]): + lines[i] = prefix + self.stash.store("
", safe=True) + return lines + + def _isLine(self, block): + """Determines if a block should be replaced with an
""" + if block.startswith(" "): return 0 # a code block + text = "".join([x for x in block if not x.isspace()]) + if len(text) <= 2: + return 0 + for pattern in ['isline1', 'isline2', 'isline3']: + m = RE.regExp[pattern].match(text) + if (m and m.group(1)): + return 1 + else: + return 0 + +LINE_PREPROCESSOR = LinePreprocessor() + + +class ReferencePreprocessor(Preprocessor): + ''' + Removes reference definitions from the text and stores them for later use. + ''' def run (self, lines): @@ -889,36 +916,47 @@ AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE) ====================================================================== Markdown also allows post-processors, which are similar to -preprocessors in that they need to implement a "run" method. Unlike -pre-processors, they take a NanoDom document as a parameter and work -with that. - -Post-Processor should extend markdown.Postprocessor. +preprocessors in that they need to implement a "run" method. However, +they are run after core processing. -There are currently no standard post-processors, but the footnote -extension below uses one. +There are two types of post-processors: Postprocessor and TextPostprocessor """ + class Postprocessor: - pass + ''' + Postprocessors are run before the dom it converted back into text. + + Each Postprocessor implements a "run" method that takes a pointer to a + NanoDom document, modifies it as necessary and returns a NanoDom + document. + + Postprocessors must extend markdown.Postprocessor. + There are currently no standard post-processors, but the footnote + extension uses one. + ''' -""" -====================================================================== -======================== TEXT-POST-PROCESSORS ======================== -====================================================================== + def run(self, dom): + pass -Markdown also allows text-post-processors, which are similar to -textpreprocessors in that they need to implement a "run" method. -Unlike post-processors, they take a text string as a parameter and -should return a string. -Text-Post-Processors should extend markdown.Postprocessor. -""" +class TextPostprocessor: + ''' + TextPostprocessors are run after the dom it converted back into text. + + Each TextPostprocessor implements a "run" method that takes a pointer to a + text string, modifies it as necessary and returns a text string. + + TextPostprocessors must extend markdown.TextPostprocessor. + ''' + + def run(self, text): + pass -class RawHtmlTextPostprocessor(Postprocessor): +class RawHtmlTextPostprocessor(TextPostprocessor): def __init__(self): pass -- cgit v1.2.3