From 1656abd114d7aae059e15302a9cc36f81154fe01 Mon Sep 17 00:00:00 2001
From: Waylan Limberg Some inline stuff. Some inline stuff. Now some This line has two spaces at the end This line has two spaces at the end This list item has two spaces. This list item has two spaces. This has none.
-This line has three.
' tags with a linebreak. Also fixed a minor bug were the '
' tags
contained 2 spaces instead of one before the slash (i.e. '
'). Note
that by moving to a Postprocessor, anyone can override with their own code
which does what they want (i.e. add indentation) with the extension api.
---
markdown.py | 65 ++++++++++++++++++++++++++++------------------
tests/misc/html.html | 3 ++-
tests/misc/two-spaces.html | 15 +++++++----
3 files changed, 52 insertions(+), 31 deletions(-)
diff --git a/markdown.py b/markdown.py
index 6089cea..9c1090b 100755
--- a/markdown.py
+++ b/markdown.py
@@ -98,22 +98,6 @@ in extensions use: `from markdown import etree`
to access to the ElemetTree module, do not import it by yourself"""
etree = importETree()
-def prettifyETree(elem):
- """ Add linebreaks to ElementTree before serialization """
-
- i = "\n"
- if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
- if (not elem.text or not elem.text.strip()) \
- and len(elem) and isBlockLevel(elem[0].tag):
- elem.text = i
- for e in elem:
- if isBlockLevel(e.tag):
- prettifyETree(e)
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
-
# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
@@ -828,8 +812,8 @@ EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
-LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
-LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
+LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br')
+LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br')
LINK_PATTERN = LinkPattern(LINK_RE)
IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
@@ -866,9 +850,6 @@ class Postprocessor:
Postprocessors must extend markdown.Postprocessor.
- There are currently no standard post-processors, but the footnote
- extension uses one.
-
"""
def run(self, root):
"""
@@ -900,6 +881,40 @@ class TextPostprocessor:
"""
pass
+class PrettifyPostprocessor(Postprocessor):
+ """ Add linebreaks to the html document. """
+
+ def _prettifyETree(self, elem):
+ """ Recursively add linebreaks to ElementTree children. """
+
+ i = "\n"
+ if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+ if (not elem.text or not elem.text.strip()) \
+ and len(elem) and isBlockLevel(elem[0].tag):
+ elem.text = i
+ for e in elem:
+ if isBlockLevel(e.tag):
+ prettifyETree(e)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+
+ def run(self, root):
+ """ Add linebreaks to ElementTree root object """
+
+ self._prettifyETree(root)
+ # Do
's seperately as they are often in the middle of
+ # inline content and missed by _prettifyETree.
+ brs = root.getiterator('br')
+ for br in brs:
+ if not br.tail or not br.tail.strip():
+ br.tail = '\n'
+ else:
+ br.tail = '\n%s' % br.tail
+
+PRETTIFYPOSTPROCESSOR = PrettifyPostprocessor()
+
class RawHtmlTextPostprocessor(TextPostprocessor):
""" Restore raw html to the document. """
@@ -1212,8 +1227,10 @@ class Markdown:
REFERENCE_PREPROCESSOR]
- self.postprocessors = [] # a footnote postprocessor will get
- # inserted later
+ self.postprocessors = [PRETTIFYPOSTPROCESSOR,
+ # a footnote postprocessor will get
+ # inserted later
+ ]
self.textPostprocessors = [# a footnote postprocessor will get
# inserted here
@@ -1955,8 +1972,6 @@ class Markdown:
if newRoot:
root = newRoot
- prettifyETree(root)
-
xml, length = codecs.utf_8_decode(etree.tostring(root, encoding="utf8"))
if self.stripTopLevelTags:
diff --git a/tests/misc/html.html b/tests/misc/html.html
index 84fe763..81ac5ee 100644
--- a/tests/misc/html.html
+++ b/tests/misc/html.html
@@ -1,6 +1,7 @@
Block level html
-
+
but this one has none
-but this line has three
and this is the second from last line
+
+but this one has none
+but this line has three
+and this is the second from last line
in this test message
+
This line has none.
-And this line two.
+This line has none.
+And this line two.
+
This line has none.