Moved whitespace normalization to a preprocessor.

Fixes #150 - at least as much as I'm willing to. This allows whitespace normalization to be overridable by the extension API. Yes, I realize that most other processors will also proabably need to be overniriden to work with any differant whitespace normalization - but I'm okay with that. As pointed out in #150, some processors have the tab length hardcoded in regexes. I'm willing to accept a working patch that fixes that - and keeps the regexes easy to override in a subclass (the provded patch moved them inside the __init__ method - which is not so easy to override in a subclass)). However, that is about the only additional change I'm willing to consider for this issue.
author: Waylan Limberg <waylan@gmail.com> 2013-02-08 09:02:33 -0500
committer: Waylan Limberg <waylan@gmail.com> 2013-02-08 09:02:33 -0500
commit: 3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 (patch)
tree: 45ac0f4211784a31a471a539298200f4104c74ef /markdown/preprocessors.py
parent: 62547c53a1f40235c83542992573cc435aa82f36 (diff)
download: markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.gz
markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.bz2
markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.zip
1 files changed, 13 insertions, 0 deletions
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index e968580..3751264 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -14,6 +14,7 @@ import odict
 def build_preprocessors(md_instance, **kwargs):
     """ Build the default set of preprocessors used by Markdown. """
     preprocessors = odict.OrderedDict()
+    preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
     if md_instance.safeMode != 'escape':
         preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
     preprocessors["reference"] = ReferencePreprocessor(md_instance)
@@ -41,6 +42,18 @@ class Preprocessor(util.Processor):
         pass
 
 
+class NormalizeWhitespace(Preprocessor):
+    """ Normalize whitespace for consistant parsing. """
+
+    def run(self, lines):
+        source = '\n'.join(lines)
+        source = source.replace(util.STX, "").replace(util.ETX, "")
+        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+        source = source.expandtabs(self.markdown.tab_length)
+        source = re.sub(r'\n +\n', '\n\n', source)
+        return source.split('\n')
+
+
 class HtmlBlockPreprocessor(Preprocessor):
     """Remove html blocks from the text and store them for later retrieval."""
author	Waylan Limberg <waylan@gmail.com>	2013-02-08 09:02:33 -0500
committer	Waylan Limberg <waylan@gmail.com>	2013-02-08 09:02:33 -0500
commit	3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 (patch)
tree	45ac0f4211784a31a471a539298200f4104c74ef /markdown/preprocessors.py
parent	62547c53a1f40235c83542992573cc435aa82f36 (diff)
download	markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.gz markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.bz2 markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.zip