aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xmarkdown.py65
-rw-r--r--tests/misc/html.html3
-rw-r--r--tests/misc/two-spaces.html15
3 files changed, 52 insertions, 31 deletions
diff --git a/markdown.py b/markdown.py
index 6089cea..9c1090b 100755
--- a/markdown.py
+++ b/markdown.py
@@ -98,22 +98,6 @@ in extensions use: `from markdown import etree`
to access to the ElemetTree module, do not import it by yourself"""
etree = importETree()
-def prettifyETree(elem):
- """ Add linebreaks to ElementTree before serialization """
-
- i = "\n"
- if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
- if (not elem.text or not elem.text.strip()) \
- and len(elem) and isBlockLevel(elem[0].tag):
- elem.text = i
- for e in elem:
- if isBlockLevel(e.tag):
- prettifyETree(e)
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
- if not elem.tail or not elem.tail.strip():
- elem.tail = i
-
# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
@@ -828,8 +812,8 @@ EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
-LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
-LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
+LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br')
+LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br')
LINK_PATTERN = LinkPattern(LINK_RE)
IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
@@ -866,9 +850,6 @@ class Postprocessor:
Postprocessors must extend markdown.Postprocessor.
- There are currently no standard post-processors, but the footnote
- extension uses one.
-
"""
def run(self, root):
"""
@@ -900,6 +881,40 @@ class TextPostprocessor:
"""
pass
+class PrettifyPostprocessor(Postprocessor):
+ """ Add linebreaks to the html document. """
+
+ def _prettifyETree(self, elem):
+ """ Recursively add linebreaks to ElementTree children. """
+
+ i = "\n"
+ if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+ if (not elem.text or not elem.text.strip()) \
+ and len(elem) and isBlockLevel(elem[0].tag):
+ elem.text = i
+ for e in elem:
+ if isBlockLevel(e.tag):
+ prettifyETree(e)
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+ if not elem.tail or not elem.tail.strip():
+ elem.tail = i
+
+ def run(self, root):
+ """ Add linebreaks to ElementTree root object """
+
+ self._prettifyETree(root)
+ # Do <br />'s seperately as they are often in the middle of
+ # inline content and missed by _prettifyETree.
+ brs = root.getiterator('br')
+ for br in brs:
+ if not br.tail or not br.tail.strip():
+ br.tail = '\n'
+ else:
+ br.tail = '\n%s' % br.tail
+
+PRETTIFYPOSTPROCESSOR = PrettifyPostprocessor()
+
class RawHtmlTextPostprocessor(TextPostprocessor):
""" Restore raw html to the document. """
@@ -1212,8 +1227,10 @@ class Markdown:
REFERENCE_PREPROCESSOR]
- self.postprocessors = [] # a footnote postprocessor will get
- # inserted later
+ self.postprocessors = [PRETTIFYPOSTPROCESSOR,
+ # a footnote postprocessor will get
+ # inserted later
+ ]
self.textPostprocessors = [# a footnote postprocessor will get
# inserted here
@@ -1955,8 +1972,6 @@ class Markdown:
if newRoot:
root = newRoot
- prettifyETree(root)
-
xml, length = codecs.utf_8_decode(etree.tostring(root, encoding="utf8"))
if self.stripTopLevelTags:
diff --git a/tests/misc/html.html b/tests/misc/html.html
index 84fe763..81ac5ee 100644
--- a/tests/misc/html.html
+++ b/tests/misc/html.html
@@ -1,6 +1,7 @@
<h1>Block level html</h1>
-<p>Some inline <b>stuff<b>.<br /></p>
+<p>Some inline <b>stuff<b>.<br />
+</p>
<p>Now some <arbitrary>arbitrary tags</arbitrary>.</p>
<div>More block level html.</div>
diff --git a/tests/misc/two-spaces.html b/tests/misc/two-spaces.html
index b0e872c..83837d2 100644
--- a/tests/misc/two-spaces.html
+++ b/tests/misc/two-spaces.html
@@ -1,14 +1,19 @@
-<p>This line has two spaces at the end<br />but this one has none
-but this line has three <br />and this is the second from last line
+<p>This line has two spaces at the end<br />
+but this one has none
+but this line has three <br />
+and this is the second from last line
in this test message</p>
<ul>
<li>
-<p>This list item has two spaces.<br /></p>
+<p>This list item has two spaces.<br />
+</p>
</li>
<li>
<p>This has none.
-This line has three. <br />This line has none.
-And this line two.<br /></p>
+This line has three. <br />
+This line has none.
+And this line two.<br />
+</p>
<p>This line has none.</p>
</li>
<li>