aboutsummaryrefslogtreecommitdiffstats
path: root/markdown/preprocessors.py
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2013-02-08 09:02:33 -0500
committerWaylan Limberg <waylan@gmail.com>2013-02-08 09:02:33 -0500
commit3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 (patch)
tree45ac0f4211784a31a471a539298200f4104c74ef /markdown/preprocessors.py
parent62547c53a1f40235c83542992573cc435aa82f36 (diff)
downloadmarkdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.gz
markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.bz2
markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.zip
Moved whitespace normalization to a preprocessor.
Fixes #150 - at least as much as I'm willing to. This allows whitespace normalization to be overridable by the extension API. Yes, I realize that most other processors will also proabably need to be overniriden to work with any differant whitespace normalization - but I'm okay with that. As pointed out in #150, some processors have the tab length hardcoded in regexes. I'm willing to accept a working patch that fixes that - and keeps the regexes easy to override in a subclass (the provded patch moved them inside the __init__ method - which is not so easy to override in a subclass)). However, that is about the only additional change I'm willing to consider for this issue.
Diffstat (limited to 'markdown/preprocessors.py')
-rw-r--r--markdown/preprocessors.py13
1 files changed, 13 insertions, 0 deletions
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index e968580..3751264 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -14,6 +14,7 @@ import odict
def build_preprocessors(md_instance, **kwargs):
""" Build the default set of preprocessors used by Markdown. """
preprocessors = odict.OrderedDict()
+ preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
if md_instance.safeMode != 'escape':
preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
preprocessors["reference"] = ReferencePreprocessor(md_instance)
@@ -41,6 +42,18 @@ class Preprocessor(util.Processor):
pass
+class NormalizeWhitespace(Preprocessor):
+ """ Normalize whitespace for consistant parsing. """
+
+ def run(self, lines):
+ source = '\n'.join(lines)
+ source = source.replace(util.STX, "").replace(util.ETX, "")
+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+ source = source.expandtabs(self.markdown.tab_length)
+ source = re.sub(r'\n +\n', '\n\n', source)
+ return source.split('\n')
+
+
class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""