diff options
author | Waylan Limberg <waylan@gmail.com> | 2013-02-08 09:02:33 -0500 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2013-02-08 09:02:33 -0500 |
commit | 3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 (patch) | |
tree | 45ac0f4211784a31a471a539298200f4104c74ef | |
parent | 62547c53a1f40235c83542992573cc435aa82f36 (diff) | |
download | markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.gz markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.bz2 markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.zip |
Moved whitespace normalization to a preprocessor.
Fixes #150 - at least as much as I'm willing to. This allows whitespace
normalization to be overridable by the extension API. Yes, I realize that most
other processors will also proabably need to be overniriden to work with any
differant whitespace normalization - but I'm okay with that.
As pointed out in #150, some processors have the tab length hardcoded in
regexes. I'm willing to accept a working patch that fixes that - and keeps
the regexes easy to override in a subclass (the provded patch moved them
inside the __init__ method - which is not so easy to override in a subclass)).
However, that is about the only additional change I'm willing to consider for
this issue.
-rw-r--r-- | markdown/__init__.py | 8 | ||||
-rw-r--r-- | markdown/extensions/fenced_code.py | 2 | ||||
-rw-r--r-- | markdown/preprocessors.py | 13 |
3 files changed, 18 insertions, 5 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py index fbd2879..e66141d 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -281,10 +281,10 @@ class Markdown: e.reason += '. -- Note: Markdown only accepts unicode input!' raise - source = source.replace(util.STX, "").replace(util.ETX, "") - source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" - source = source.expandtabs(self.tab_length) - source = re.sub(r'\n +\n', '\n\n', source) + #source = source.replace(util.STX, "").replace(util.ETX, "") + #source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + #source = source.expandtabs(self.tab_length) + #source = re.sub(r'\n +\n', '\n\n', source) # Split into lines and run the line preprocessors. self.lines = source.split("\n") diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 9a1284f..76d644f 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -95,7 +95,7 @@ class FencedCodeExtension(markdown.Extension): md.preprocessors.add('fenced_code_block', FencedBlockPreprocessor(md), - "_begin") + ">normalize_whitespace") class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index e968580..3751264 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -14,6 +14,7 @@ import odict def build_preprocessors(md_instance, **kwargs): """ Build the default set of preprocessors used by Markdown. """ preprocessors = odict.OrderedDict() + preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) if md_instance.safeMode != 'escape': preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) preprocessors["reference"] = ReferencePreprocessor(md_instance) @@ -41,6 +42,18 @@ class Preprocessor(util.Processor): pass +class NormalizeWhitespace(Preprocessor): + """ Normalize whitespace for consistant parsing. """ + + def run(self, lines): + source = '\n'.join(lines) + source = source.replace(util.STX, "").replace(util.ETX, "") + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + source = source.expandtabs(self.markdown.tab_length) + source = re.sub(r'\n +\n', '\n\n', source) + return source.split('\n') + + class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" |