aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2013-02-08 09:02:33 -0500
committerWaylan Limberg <waylan@gmail.com>2013-02-08 09:02:33 -0500
commit3a1806b3b77dbcd01e351c3e28d8083bd3661ea3 (patch)
tree45ac0f4211784a31a471a539298200f4104c74ef
parent62547c53a1f40235c83542992573cc435aa82f36 (diff)
downloadmarkdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.gz
markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.tar.bz2
markdown-3a1806b3b77dbcd01e351c3e28d8083bd3661ea3.zip
Moved whitespace normalization to a preprocessor.
Fixes #150 - at least as much as I'm willing to. This allows whitespace normalization to be overridable by the extension API. Yes, I realize that most other processors will also proabably need to be overniriden to work with any differant whitespace normalization - but I'm okay with that. As pointed out in #150, some processors have the tab length hardcoded in regexes. I'm willing to accept a working patch that fixes that - and keeps the regexes easy to override in a subclass (the provded patch moved them inside the __init__ method - which is not so easy to override in a subclass)). However, that is about the only additional change I'm willing to consider for this issue.
-rw-r--r--markdown/__init__.py8
-rw-r--r--markdown/extensions/fenced_code.py2
-rw-r--r--markdown/preprocessors.py13
3 files changed, 18 insertions, 5 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index fbd2879..e66141d 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -281,10 +281,10 @@ class Markdown:
e.reason += '. -- Note: Markdown only accepts unicode input!'
raise
- source = source.replace(util.STX, "").replace(util.ETX, "")
- source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
- source = source.expandtabs(self.tab_length)
- source = re.sub(r'\n +\n', '\n\n', source)
+ #source = source.replace(util.STX, "").replace(util.ETX, "")
+ #source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+ #source = source.expandtabs(self.tab_length)
+ #source = re.sub(r'\n +\n', '\n\n', source)
# Split into lines and run the line preprocessors.
self.lines = source.split("\n")
diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
index 9a1284f..76d644f 100644
--- a/markdown/extensions/fenced_code.py
+++ b/markdown/extensions/fenced_code.py
@@ -95,7 +95,7 @@ class FencedCodeExtension(markdown.Extension):
md.preprocessors.add('fenced_code_block',
FencedBlockPreprocessor(md),
- "_begin")
+ ">normalize_whitespace")
class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index e968580..3751264 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -14,6 +14,7 @@ import odict
def build_preprocessors(md_instance, **kwargs):
""" Build the default set of preprocessors used by Markdown. """
preprocessors = odict.OrderedDict()
+ preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
if md_instance.safeMode != 'escape':
preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
preprocessors["reference"] = ReferencePreprocessor(md_instance)
@@ -41,6 +42,18 @@ class Preprocessor(util.Processor):
pass
+class NormalizeWhitespace(Preprocessor):
+ """ Normalize whitespace for consistant parsing. """
+
+ def run(self, lines):
+ source = '\n'.join(lines)
+ source = source.replace(util.STX, "").replace(util.ETX, "")
+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+ source = source.expandtabs(self.markdown.tab_length)
+ source = re.sub(r'\n +\n', '\n\n', source)
+ return source.split('\n')
+
+
class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""