From 3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan@gmail.com>
Date: Fri, 8 Feb 2013 09:02:33 -0500
Subject: Moved whitespace normalization to a preprocessor.

Fixes #150 - at least as much as I'm willing to. This allows whitespace
normalization to be overridable by the extension API. Yes, I realize that most
other processors will also proabably need to be overniriden to work with any
differant whitespace normalization - but I'm okay with that.

As pointed out in #150, some processors have the tab length hardcoded in
regexes. I'm willing to accept a working patch that fixes that - and keeps
the regexes easy to override in a subclass (the provded patch moved them
inside the __init__ method - which is not so easy to override in a subclass)).
However, that is about the only additional change I'm willing to consider for
this issue.
---
 markdown/__init__.py               |  8 ++++----
 markdown/extensions/fenced_code.py |  2 +-
 markdown/preprocessors.py          | 13 +++++++++++++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/markdown/__init__.py b/markdown/__init__.py
index fbd2879..e66141d 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -281,10 +281,10 @@ class Markdown:
             e.reason += '. -- Note: Markdown only accepts unicode input!'
             raise
 
-        source = source.replace(util.STX, "").replace(util.ETX, "")
-        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
-        source = source.expandtabs(self.tab_length)
-        source = re.sub(r'\n +\n', '\n\n', source)
+        #source = source.replace(util.STX, "").replace(util.ETX, "")
+        #source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+        #source = source.expandtabs(self.tab_length)
+        #source = re.sub(r'\n +\n', '\n\n', source)
 
         # Split into lines and run the line preprocessors.
         self.lines = source.split("\n")
diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
index 9a1284f..76d644f 100644
--- a/markdown/extensions/fenced_code.py
+++ b/markdown/extensions/fenced_code.py
@@ -95,7 +95,7 @@ class FencedCodeExtension(markdown.Extension):
 
         md.preprocessors.add('fenced_code_block',
                                  FencedBlockPreprocessor(md),
-                                 "_begin")
+                                 ">normalize_whitespace")
 
 
 class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index e968580..3751264 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -14,6 +14,7 @@ import odict
 def build_preprocessors(md_instance, **kwargs):
     """ Build the default set of preprocessors used by Markdown. """
     preprocessors = odict.OrderedDict()
+    preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
     if md_instance.safeMode != 'escape':
         preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
     preprocessors["reference"] = ReferencePreprocessor(md_instance)
@@ -41,6 +42,18 @@ class Preprocessor(util.Processor):
         pass
 
 
+class NormalizeWhitespace(Preprocessor):
+    """ Normalize whitespace for consistant parsing. """
+
+    def run(self, lines):
+        source = '\n'.join(lines)
+        source = source.replace(util.STX, "").replace(util.ETX, "")
+        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+        source = source.expandtabs(self.markdown.tab_length)
+        source = re.sub(r'\n +\n', '\n\n', source)
+        return source.split('\n')
+
+
 class HtmlBlockPreprocessor(Preprocessor):
     """Remove html blocks from the text and store them for later retrieval."""
 
-- 
cgit v1.2.3