From 3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 8 Feb 2013 09:02:33 -0500 Subject: Moved whitespace normalization to a preprocessor. Fixes #150 - at least as much as I'm willing to. This allows whitespace normalization to be overridable by the extension API. Yes, I realize that most other processors will also proabably need to be overniriden to work with any differant whitespace normalization - but I'm okay with that. As pointed out in #150, some processors have the tab length hardcoded in regexes. I'm willing to accept a working patch that fixes that - and keeps the regexes easy to override in a subclass (the provded patch moved them inside the __init__ method - which is not so easy to override in a subclass)). However, that is about the only additional change I'm willing to consider for this issue. --- markdown/__init__.py | 8 ++++---- markdown/extensions/fenced_code.py | 2 +- markdown/preprocessors.py | 13 +++++++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/markdown/__init__.py b/markdown/__init__.py index fbd2879..e66141d 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -281,10 +281,10 @@ class Markdown: e.reason += '. -- Note: Markdown only accepts unicode input!' raise - source = source.replace(util.STX, "").replace(util.ETX, "") - source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" - source = source.expandtabs(self.tab_length) - source = re.sub(r'\n +\n', '\n\n', source) + #source = source.replace(util.STX, "").replace(util.ETX, "") + #source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + #source = source.expandtabs(self.tab_length) + #source = re.sub(r'\n +\n', '\n\n', source) # Split into lines and run the line preprocessors. self.lines = source.split("\n") diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 9a1284f..76d644f 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -95,7 +95,7 @@ class FencedCodeExtension(markdown.Extension): md.preprocessors.add('fenced_code_block', FencedBlockPreprocessor(md), - "_begin") + ">normalize_whitespace") class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index e968580..3751264 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -14,6 +14,7 @@ import odict def build_preprocessors(md_instance, **kwargs): """ Build the default set of preprocessors used by Markdown. """ preprocessors = odict.OrderedDict() + preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) if md_instance.safeMode != 'escape': preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) preprocessors["reference"] = ReferencePreprocessor(md_instance) @@ -41,6 +42,18 @@ class Preprocessor(util.Processor): pass +class NormalizeWhitespace(Preprocessor): + """ Normalize whitespace for consistant parsing. """ + + def run(self, lines): + source = '\n'.join(lines) + source = source.replace(util.STX, "").replace(util.ETX, "") + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + source = source.expandtabs(self.markdown.tab_length) + source = re.sub(r'\n +\n', '\n\n', source) + return source.split('\n') + + class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" -- cgit v1.2.3