From 06d672e913bbf998dd0533de8440190c639fb18f Mon Sep 17 00:00:00 2001
From: Artem
Date: Thu, 26 Jun 2008 20:52:20 +0500
Subject: Test suite cleanup. README for markdown2 tests. Separation in two
stages(markdown to tree, applying inline patterns).
---
.gitignore | 1 +
markdown.py | 119 +++++++++++++++++----
test-markdown.py | 8 +-
tests2/formater.py | 55 ----------
.../Code block in a list item.tags | 1 -
tests2/php-markdown-cases-new/Emphasis.tags | 1 -
.../Inline HTML (Simple).tags | 1 -
.../php-markdown-cases-new/Inline HTML (Span).tags | 1 -
tests2/php-markdown-cases-new/Ins & del.html~ | 24 -----
tests2/php-markdown-cases-new/Parens in URL.tags | 1 -
tests2/tm-cases-new/code_block_with_tabs.tags | 1 -
tests2/tm-cases-new/codespans_safe_mode.opts | 1 -
tests2/tm-cases-new/codespans_safe_mode.tags | 1 -
tests2/tm-cases-new/escapes.tags | 1 -
tests2/tm-cases-new/parens_in_url_4.tags | 1 -
tests2/tm-cases-new/sublist-para.tags | 1 -
16 files changed, 103 insertions(+), 115 deletions(-)
delete mode 100644 tests2/formater.py
delete mode 100644 tests2/php-markdown-cases-new/Code block in a list item.tags
delete mode 100644 tests2/php-markdown-cases-new/Emphasis.tags
delete mode 100644 tests2/php-markdown-cases-new/Inline HTML (Simple).tags
delete mode 100644 tests2/php-markdown-cases-new/Inline HTML (Span).tags
delete mode 100644 tests2/php-markdown-cases-new/Ins & del.html~
delete mode 100644 tests2/php-markdown-cases-new/Parens in URL.tags
delete mode 100644 tests2/tm-cases-new/code_block_with_tabs.tags
delete mode 100644 tests2/tm-cases-new/codespans_safe_mode.opts
delete mode 100644 tests2/tm-cases-new/codespans_safe_mode.tags
delete mode 100644 tests2/tm-cases-new/escapes.tags
delete mode 100644 tests2/tm-cases-new/parens_in_url_4.tags
delete mode 100644 tests2/tm-cases-new/sublist-para.tags
diff --git a/.gitignore b/.gitignore
index 1be516e..64c7154 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
*.tmp
tmp/*
__init__.py
+markdown_old.py
diff --git a/markdown.py b/markdown.py
index 131e318..4eba97a 100644
--- a/markdown.py
+++ b/markdown.py
@@ -179,6 +179,7 @@ class Document:
def __init__ (self):
""" Create a NanoDom document. """
self.bidi = "ltr"
+ self.stripTopLevelTags = True
def appendChild(self, child):
""" Add a dom element as a child of the document root. """
@@ -221,7 +222,10 @@ class Document:
def toxml (self):
""" Convert document to xml and return a string. """
- return self.documentElement.toxml()
+ xml = self.documentElement.toxml()
+ if self.stripTopLevelTags:
+ xml = xml.strip()[23:-7] + "\n"
+ return xml
def normalizeEntities(self, text, avoidDoubleNormalizing=False):
""" Return the given text as an html entity (i.e.: `<` => `>`). """
@@ -340,6 +344,8 @@ class Element:
if child.type == "element":
matched_nodes += child.find(test, depth+1)
return matched_nodes
+
+
def toxml(self):
""" Return the Element and all children as a string. """
@@ -423,6 +429,7 @@ class TextNode:
def toxml(self):
""" Return the TextNode as a string. """
text = self.value
+
self.parentNode.setBidi(getBidiType(text))
@@ -435,6 +442,7 @@ class TextNode:
and self.parentNode.childNodes[0]==self):
text = "\n " + text.replace("\n", "\n ")
+
text = self.doc.normalizeEntities(text)
return text
@@ -1357,7 +1365,6 @@ class Markdown:
self.safeMode = safe_mode
self.blockGuru = BlockGuru()
self.registeredExtensions = []
- self.stripTopLevelTags = 1
self.docType = ""
self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
@@ -1492,10 +1499,12 @@ class Markdown:
buffer = []
for line in self.lines:
if line.startswith("#"):
+
self._processSection(self.top_element, buffer)
buffer = [line]
else:
buffer.append(line)
+
self._processSection(self.top_element, buffer)
#self._processSection(self.top_element, self.lines)
@@ -1511,7 +1520,7 @@ class Markdown:
def _processSection(self, parent_elem, lines,
- inList = 0, looseList = 0):
+ inList=0, looseList=0):
"""
Process a section of a source document, looking for high
level structural elements like lists, block quotes, code
@@ -1594,14 +1603,16 @@ class Markdown:
level = len(m.group(1))
h = self.doc.createElement("h%d" % level)
parent_elem.appendChild(h)
- for item in self._handleInline(m.group(2).strip()):
- h.appendChild(item)
+ h.appendChild(self.doc.createTextNode(m.group(2).strip()))
else:
message(CRITICAL, "We've got a problem header!")
def _processParagraph(self, parent_elem, paragraph, inList, looseList):
- list = self._handleInline("\n".join(paragraph))
+ #list = self._handleInline("\n".join(paragraph))
+
+
+
if ( parent_elem.nodeName == 'li'
and not (looseList or parent_elem.childNodes)):
@@ -1615,8 +1626,10 @@ class Markdown:
el = self.doc.createElement("p")
parent_elem.appendChild(el)
- for item in list:
- el.appendChild(item)
+ el.appendChild(self.doc.createTextNode("\n".join(paragraph)))
+
+ #for item in list:
+ #el.appendChild(item)
def _processUList(self, parent_elem, lines, inList):
@@ -1804,6 +1817,7 @@ class Markdown:
parent_elem.appendChild(pre)
pre.appendChild(code)
text = "\n".join(detabbed).rstrip()+"\n"
+
#text = text.replace("&", "&")
code.appendChild(self.doc.createTextNode(text))
self._processSection(parent_elem, theRest, inList)
@@ -1931,31 +1945,80 @@ class Markdown:
else:
return None
+
+
+ def _processTree(self, el):
+
+ stack = [el]
+ while stack:
+ currElement = stack.pop()
+ insertQueue = []
+ for child in currElement.childNodes:
+
+ if child.type == "text":
- def convert (self, source=None):
+ lst = self._handleInline(child.value)
+
+ pos = currElement.childNodes.index(child)
+
+ insertQueue.append((pos, lst))
+
+ else:
+ stack.append(child)
+ for pos, lst in insertQueue:
+ del currElement.childNodes[pos]
+ for newChild in lst:
+ currElement.insertChild(pos, newChild)
+ pos += 1
+
+ def applyInlinePatterns(self, markdownTree):
"""
- Return the document in XHTML format.
+ Retrun NanoDOM markdown tree, with applied
+ inline paterns
+
+ Keyword arguments:
+
+ * markdownTree: NanoDOM Document object, reppresenting Markdown tree.
+ Returns: NanoDOM Document object.
+ """
+
+ self.doc = markdownTree
+
+ el = markdownTree.documentElement
+
+ self._processTree(el)
+
+ return self.doc
+
+
+
+
+
+
+ def markdownToTree(self, source=None):
+ """
+ Retrun NanoDOM markdown tree, without applying
+ inline paterns
+
Keyword arguments:
* source: An ascii or unicode string of Markdown formated text.
- Returns: A serialized XHTML body.
-
+ Returns: NanoDOM document.
"""
-
if source is not None: #Allow blank string
self.source = source
-
+
if not self.source:
return u""
-
+
try:
self.source = unicode(self.source)
except UnicodeDecodeError:
message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
return u""
-
+
# Fixup the source text
self.source = self.source.replace("\r\n", "\n").replace("\r", "\n")
self.source += "\n\n"
@@ -1963,15 +2026,29 @@ class Markdown:
for pp in self.textPreprocessors:
self.source = pp.run(self.source)
+
+ markdownTree = self._transform()
+
+
+ return markdownTree
+
+
+
+ def convert (self, source=None):
+ """
+ Return the document in XHTML format.
- doc = self._transform()
- xml = doc.toxml()
+ Keyword arguments:
+
+ * source: An ascii or unicode string of Markdown formated text.
+ Returns: A serialized XHTML body.
- # Return everything but the top level tag
+ """
- if self.stripTopLevelTags:
- xml = xml.strip()[23:-7] + "\n"
+ tree = self.markdownToTree(source)
+
+ xml = self.applyInlinePatterns(tree).toxml()
for pp in self.textPostprocessors:
xml = pp.run(xml)
diff --git a/test-markdown.py b/test-markdown.py
index 219db50..3007d72 100644
--- a/test-markdown.py
+++ b/test-markdown.py
@@ -353,14 +353,14 @@ print MARKDOWN_FILE
markdown = __import__(MARKDOWN_FILE)
-"""
+
#testDirectory("tests/basic")
testDirectory("tests/markdown-test", measure_time=True)
testDirectory("tests/misc", measure_time=True)
#testDirectory("tests/extensions-x-footnotes")
# testDirectory("tests/extensions-x-ext1-ext2")
-testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape") """
+testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape")
-testDirectory("tests2/php-markdown-cases-new", measure_time=True)
-testDirectory("tests2/tm-cases-new", measure_time=True)
+#testDirectory("tests2/php-markdown-cases-new", measure_time=True)
+#testDirectory("tests2/tm-cases-new", measure_time=True)
diff --git a/tests2/formater.py b/tests2/formater.py
deleted file mode 100644
index 26d850f..0000000
--- a/tests2/formater.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import with_statement
-import os
-
-excl_tm_cases = """basic_safe_mode
-basic_safe_mode_escape
-auto_link_safe_mode
-code_safe_emphasis
-emacs_head_vars
-emacs_tail_vars
-footnotes
-footnotes_letters
-footnotes_markup
-footnotes_safe_mode_escape
-nested_list_safe_mode
-issue2_safe_mode_borks_markup
-issue3_bad_code_color_hack
-link_defn_spaces_in_url
-link_patterns
-link_patterns_double_hit
-link_patterns_edge_cases
-mismatched_footnotes
-nested_lists_safe_mode
-pyshell
-syntax_color"""
-
-
-def reformat(path, dest, ex=""):
- excl = ex.split("\n")
- for fname in os.listdir(path):
- if fname.endswith(".html"):
- if fname[:-5] in excl:
- continue
- res = processFile(path + fname)
- with open(dest + fname, "w") as rfile:
- rfile.write(res)
-
-def processFile(filePath):
- with open(filePath) as f:
- result = f.read()
- result = result.replace("\n\n", "
")
- result = result.replace("\n<", "<")
- result = result.replace("", "\n")
- result = result.replace("
", "\n")
- result = result.replace(">\n", ">
")
- result = result.replace("\" />", "\"/>")
- result = result.replace("
\n", "\n
\n")
-
- return result
-
-if __name__ == "__main__":
- reformat("php-markdown-cases/", "php-markdown-cases-new/")
- reformat("tm-cases/", "tm-cases-new/", excl_tm_cases)
-
-
-
\ No newline at end of file
diff --git a/tests2/php-markdown-cases-new/Code block in a list item.tags b/tests2/php-markdown-cases-new/Code block in a list item.tags
deleted file mode 100644
index a3d2682..0000000
--- a/tests2/php-markdown-cases-new/Code block in a list item.tags
+++ /dev/null
@@ -1 +0,0 @@
-knownfailure eol dontcare # fails just because of trailing EOL
diff --git a/tests2/php-markdown-cases-new/Emphasis.tags b/tests2/php-markdown-cases-new/Emphasis.tags
deleted file mode 100644
index 06cb210..0000000
--- a/tests2/php-markdown-cases-new/Emphasis.tags
+++ /dev/null
@@ -1 +0,0 @@
-knownfailure dontcare # need a re-write based on html5lib (or something) to fix
diff --git a/tests2/php-markdown-cases-new/Inline HTML (Simple).tags b/tests2/php-markdown-cases-new/Inline HTML (Simple).tags
deleted file mode 100644
index a3d2682..0000000
--- a/tests2/php-markdown-cases-new/Inline HTML (Simple).tags
+++ /dev/null
@@ -1 +0,0 @@
-knownfailure eol dontcare # fails just because of trailing EOL
diff --git a/tests2/php-markdown-cases-new/Inline HTML (Span).tags b/tests2/php-markdown-cases-new/Inline HTML (Span).tags
deleted file mode 100644
index 06cb210..0000000
--- a/tests2/php-markdown-cases-new/Inline HTML (Span).tags
+++ /dev/null
@@ -1 +0,0 @@
-knownfailure dontcare # need a re-write based on html5lib (or something) to fix
diff --git a/tests2/php-markdown-cases-new/Ins & del.html~ b/tests2/php-markdown-cases-new/Ins & del.html~
deleted file mode 100644
index 82bfb2b..0000000
--- a/tests2/php-markdown-cases-new/Ins & del.html~
+++ /dev/null
@@ -1,24 +0,0 @@
-Here is a block tag ins:
-
-
-
-Some text
-
-
-
-And here it is inside a paragraph.
-
-
-And here it is in the middle of a paragraph.
-
-
-
-Some text
-
-
-
-And here is ins as a paragraph.
-
-
-And here it is in the middle of a paragraph.
-
diff --git a/tests2/php-markdown-cases-new/Parens in URL.tags b/tests2/php-markdown-cases-new/Parens in URL.tags
deleted file mode 100644
index 918b7e8..0000000
--- a/tests2/php-markdown-cases-new/Parens in URL.tags
+++ /dev/null
@@ -1 +0,0 @@
-dontcare knownfailure
diff --git a/tests2/tm-cases-new/code_block_with_tabs.tags b/tests2/tm-cases-new/code_block_with_tabs.tags
deleted file mode 100644
index 981bc5a..0000000
--- a/tests2/tm-cases-new/code_block_with_tabs.tags
+++ /dev/null
@@ -1 +0,0 @@
-fromphpmarkdown # from PHP Markdown test "Parens in URL.text"
diff --git a/tests2/tm-cases-new/codespans_safe_mode.opts b/tests2/tm-cases-new/codespans_safe_mode.opts
deleted file mode 100644
index ccb6a09..0000000
--- a/tests2/tm-cases-new/codespans_safe_mode.opts
+++ /dev/null
@@ -1 +0,0 @@
-{'safe_mode': True}
diff --git a/tests2/tm-cases-new/codespans_safe_mode.tags b/tests2/tm-cases-new/codespans_safe_mode.tags
deleted file mode 100644
index dcc8b6c..0000000
--- a/tests2/tm-cases-new/codespans_safe_mode.tags
+++ /dev/null
@@ -1 +0,0 @@
-issue9 safe_mode
diff --git a/tests2/tm-cases-new/escapes.tags b/tests2/tm-cases-new/escapes.tags
deleted file mode 100644
index 0184518..0000000
--- a/tests2/tm-cases-new/escapes.tags
+++ /dev/null
@@ -1 +0,0 @@
-issue15
diff --git a/tests2/tm-cases-new/parens_in_url_4.tags b/tests2/tm-cases-new/parens_in_url_4.tags
deleted file mode 100644
index 981bc5a..0000000
--- a/tests2/tm-cases-new/parens_in_url_4.tags
+++ /dev/null
@@ -1 +0,0 @@
-fromphpmarkdown # from PHP Markdown test "Parens in URL.text"
diff --git a/tests2/tm-cases-new/sublist-para.tags b/tests2/tm-cases-new/sublist-para.tags
deleted file mode 100644
index 81b35c7..0000000
--- a/tests2/tm-cases-new/sublist-para.tags
+++ /dev/null
@@ -1 +0,0 @@
-questionable # isn't really correct, but Markdown.pl does the same
--
cgit v1.2.3