From 1656abd114d7aae059e15302a9cc36f81154fe01 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 4 Sep 2008 00:20:54 -0400 Subject: Moved prettifyETree into a Postprocessor and added code to append all '
' tags with a linebreak. Also fixed a minor bug were the '
' tags contained 2 spaces instead of one before the slash (i.e. '
'). Note that by moving to a Postprocessor, anyone can override with their own code which does what they want (i.e. add indentation) with the extension api. --- markdown.py | 65 ++++++++++++++++++++++++++++------------------ tests/misc/html.html | 3 ++- tests/misc/two-spaces.html | 15 +++++++---- 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/markdown.py b/markdown.py index 6089cea..9c1090b 100755 --- a/markdown.py +++ b/markdown.py @@ -98,22 +98,6 @@ in extensions use: `from markdown import etree` to access to the ElemetTree module, do not import it by yourself""" etree = importETree() -def prettifyETree(elem): - """ Add linebreaks to ElementTree before serialization """ - - i = "\n" - if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: - if (not elem.text or not elem.text.strip()) \ - and len(elem) and isBlockLevel(elem[0].tag): - elem.text = i - for e in elem: - if isBlockLevel(e.tag): - prettifyETree(e) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - if not elem.tail or not elem.tail.strip(): - elem.tail = i - # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY ----------------- @@ -828,8 +812,8 @@ EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em') STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em') -LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ') -LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ') +LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br') +LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br') LINK_PATTERN = LinkPattern(LINK_RE) IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE) @@ -866,9 +850,6 @@ class Postprocessor: Postprocessors must extend markdown.Postprocessor. - There are currently no standard post-processors, but the footnote - extension uses one. - """ def run(self, root): """ @@ -900,6 +881,40 @@ class TextPostprocessor: """ pass +class PrettifyPostprocessor(Postprocessor): + """ Add linebreaks to the html document. """ + + def _prettifyETree(self, elem): + """ Recursively add linebreaks to ElementTree children. """ + + i = "\n" + if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: + if (not elem.text or not elem.text.strip()) \ + and len(elem) and isBlockLevel(elem[0].tag): + elem.text = i + for e in elem: + if isBlockLevel(e.tag): + prettifyETree(e) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + if not elem.tail or not elem.tail.strip(): + elem.tail = i + + def run(self, root): + """ Add linebreaks to ElementTree root object """ + + self._prettifyETree(root) + # Do
's seperately as they are often in the middle of + # inline content and missed by _prettifyETree. + brs = root.getiterator('br') + for br in brs: + if not br.tail or not br.tail.strip(): + br.tail = '\n' + else: + br.tail = '\n%s' % br.tail + +PRETTIFYPOSTPROCESSOR = PrettifyPostprocessor() + class RawHtmlTextPostprocessor(TextPostprocessor): """ Restore raw html to the document. """ @@ -1212,8 +1227,10 @@ class Markdown: REFERENCE_PREPROCESSOR] - self.postprocessors = [] # a footnote postprocessor will get - # inserted later + self.postprocessors = [PRETTIFYPOSTPROCESSOR, + # a footnote postprocessor will get + # inserted later + ] self.textPostprocessors = [# a footnote postprocessor will get # inserted here @@ -1955,8 +1972,6 @@ class Markdown: if newRoot: root = newRoot - prettifyETree(root) - xml, length = codecs.utf_8_decode(etree.tostring(root, encoding="utf8")) if self.stripTopLevelTags: diff --git a/tests/misc/html.html b/tests/misc/html.html index 84fe763..81ac5ee 100644 --- a/tests/misc/html.html +++ b/tests/misc/html.html @@ -1,6 +1,7 @@

Block level html

-

Some inline stuff.

+

Some inline stuff.
+

Now some arbitrary tags.

More block level html.
diff --git a/tests/misc/two-spaces.html b/tests/misc/two-spaces.html index b0e872c..83837d2 100644 --- a/tests/misc/two-spaces.html +++ b/tests/misc/two-spaces.html @@ -1,14 +1,19 @@ -

This line has two spaces at the end
but this one has none -but this line has three
and this is the second from last line +

This line has two spaces at the end
+but this one has none +but this line has three
+and this is the second from last line in this test message

  • -

    This list item has two spaces.

    +

    This list item has two spaces.
    +

  • This has none. -This line has three.
    This line has none. -And this line two.

    +This line has three.
    +This line has none. +And this line two.
    +

    This line has none.

  • -- cgit v1.2.3