diff options
author | Waylan Limberg <waylan@gmail.com> | 2013-02-08 09:02:33 -0500 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2013-02-08 09:02:33 -0500 |
commit | 3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 (patch) | |
tree | 45ac0f4211784a31a471a539298200f4104c74ef /markdown/preprocessors.py | |
parent | 62547c53a1f40235c83542992573cc435aa82f36 (diff) | |
download | markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.gz markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.bz2 markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.zip |
Moved whitespace normalization to a preprocessor.
Fixes #150 - at least as much as I'm willing to. This allows whitespace
normalization to be overridable by the extension API. Yes, I realize that most
other processors will also proabably need to be overniriden to work with any
differant whitespace normalization - but I'm okay with that.
As pointed out in #150, some processors have the tab length hardcoded in
regexes. I'm willing to accept a working patch that fixes that - and keeps
the regexes easy to override in a subclass (the provded patch moved them
inside the __init__ method - which is not so easy to override in a subclass)).
However, that is about the only additional change I'm willing to consider for
this issue.
Diffstat (limited to 'markdown/preprocessors.py')
-rw-r--r-- | markdown/preprocessors.py | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index e968580..3751264 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -14,6 +14,7 @@ import odict def build_preprocessors(md_instance, **kwargs): """ Build the default set of preprocessors used by Markdown. """ preprocessors = odict.OrderedDict() + preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) if md_instance.safeMode != 'escape': preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) preprocessors["reference"] = ReferencePreprocessor(md_instance) @@ -41,6 +42,18 @@ class Preprocessor(util.Processor): pass +class NormalizeWhitespace(Preprocessor): + """ Normalize whitespace for consistant parsing. """ + + def run(self, lines): + source = '\n'.join(lines) + source = source.replace(util.STX, "").replace(util.ETX, "") + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + source = source.expandtabs(self.markdown.tab_length) + source = re.sub(r'\n +\n', '\n\n', source) + return source.split('\n') + + class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" |