diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | markdown.py | 119 | ||||
-rw-r--r-- | test-markdown.py | 8 | ||||
-rw-r--r-- | tests2/formater.py | 55 | ||||
-rw-r--r-- | tests2/php-markdown-cases-new/Code block in a list item.tags | 1 | ||||
-rw-r--r-- | tests2/php-markdown-cases-new/Emphasis.tags | 1 | ||||
-rw-r--r-- | tests2/php-markdown-cases-new/Inline HTML (Simple).tags | 1 | ||||
-rw-r--r-- | tests2/php-markdown-cases-new/Inline HTML (Span).tags | 1 | ||||
-rw-r--r-- | tests2/php-markdown-cases-new/Ins & del.html~ | 24 | ||||
-rw-r--r-- | tests2/php-markdown-cases-new/Parens in URL.tags | 1 | ||||
-rw-r--r-- | tests2/tm-cases-new/code_block_with_tabs.tags | 1 | ||||
-rw-r--r-- | tests2/tm-cases-new/codespans_safe_mode.opts | 1 | ||||
-rw-r--r-- | tests2/tm-cases-new/codespans_safe_mode.tags | 1 | ||||
-rw-r--r-- | tests2/tm-cases-new/escapes.tags | 1 | ||||
-rw-r--r-- | tests2/tm-cases-new/parens_in_url_4.tags | 1 | ||||
-rw-r--r-- | tests2/tm-cases-new/sublist-para.tags | 1 |
16 files changed, 103 insertions, 115 deletions
@@ -3,3 +3,4 @@ *.tmp tmp/* __init__.py +markdown_old.py diff --git a/markdown.py b/markdown.py index 131e318..4eba97a 100644 --- a/markdown.py +++ b/markdown.py @@ -179,6 +179,7 @@ class Document: def __init__ (self): """ Create a NanoDom document. """ self.bidi = "ltr" + self.stripTopLevelTags = True def appendChild(self, child): """ Add a dom element as a child of the document root. """ @@ -221,7 +222,10 @@ class Document: def toxml (self): """ Convert document to xml and return a string. """ - return self.documentElement.toxml() + xml = self.documentElement.toxml() + if self.stripTopLevelTags: + xml = xml.strip()[23:-7] + "\n" + return xml def normalizeEntities(self, text, avoidDoubleNormalizing=False): """ Return the given text as an html entity (i.e.: `<` => `>`). """ @@ -340,6 +344,8 @@ class Element: if child.type == "element": matched_nodes += child.find(test, depth+1) return matched_nodes + + def toxml(self): """ Return the Element and all children as a string. """ @@ -423,6 +429,7 @@ class TextNode: def toxml(self): """ Return the TextNode as a string. """ text = self.value + self.parentNode.setBidi(getBidiType(text)) @@ -435,6 +442,7 @@ class TextNode: and self.parentNode.childNodes[0]==self): text = "\n " + text.replace("\n", "\n ") + text = self.doc.normalizeEntities(text) return text @@ -1357,7 +1365,6 @@ class Markdown: self.safeMode = safe_mode self.blockGuru = BlockGuru() self.registeredExtensions = [] - self.stripTopLevelTags = 1 self.docType = "" self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR] @@ -1492,10 +1499,12 @@ class Markdown: buffer = [] for line in self.lines: if line.startswith("#"): + self._processSection(self.top_element, buffer) buffer = [line] else: buffer.append(line) + self._processSection(self.top_element, buffer) #self._processSection(self.top_element, self.lines) @@ -1511,7 +1520,7 @@ class Markdown: def _processSection(self, parent_elem, lines, - inList = 0, looseList = 0): + inList=0, looseList=0): """ Process a section of a source document, looking for high level structural elements like lists, block quotes, code @@ -1594,14 +1603,16 @@ class Markdown: level = len(m.group(1)) h = self.doc.createElement("h%d" % level) parent_elem.appendChild(h) - for item in self._handleInline(m.group(2).strip()): - h.appendChild(item) + h.appendChild(self.doc.createTextNode(m.group(2).strip())) else: message(CRITICAL, "We've got a problem header!") def _processParagraph(self, parent_elem, paragraph, inList, looseList): - list = self._handleInline("\n".join(paragraph)) + #list = self._handleInline("\n".join(paragraph)) + + + if ( parent_elem.nodeName == 'li' and not (looseList or parent_elem.childNodes)): @@ -1615,8 +1626,10 @@ class Markdown: el = self.doc.createElement("p") parent_elem.appendChild(el) - for item in list: - el.appendChild(item) + el.appendChild(self.doc.createTextNode("\n".join(paragraph))) + + #for item in list: + #el.appendChild(item) def _processUList(self, parent_elem, lines, inList): @@ -1804,6 +1817,7 @@ class Markdown: parent_elem.appendChild(pre) pre.appendChild(code) text = "\n".join(detabbed).rstrip()+"\n" + #text = text.replace("&", "&") code.appendChild(self.doc.createTextNode(text)) self._processSection(parent_elem, theRest, inList) @@ -1931,31 +1945,80 @@ class Markdown: else: return None + + + def _processTree(self, el): + + stack = [el] + while stack: + currElement = stack.pop() + insertQueue = [] + for child in currElement.childNodes: + + if child.type == "text": - def convert (self, source=None): + lst = self._handleInline(child.value) + + pos = currElement.childNodes.index(child) + + insertQueue.append((pos, lst)) + + else: + stack.append(child) + for pos, lst in insertQueue: + del currElement.childNodes[pos] + for newChild in lst: + currElement.insertChild(pos, newChild) + pos += 1 + + def applyInlinePatterns(self, markdownTree): """ - Return the document in XHTML format. + Retrun NanoDOM markdown tree, with applied + inline paterns + + Keyword arguments: + + * markdownTree: NanoDOM Document object, reppresenting Markdown tree. + Returns: NanoDOM Document object. + """ + + self.doc = markdownTree + + el = markdownTree.documentElement + + self._processTree(el) + + return self.doc + + + + + + + def markdownToTree(self, source=None): + """ + Retrun NanoDOM markdown tree, without applying + inline paterns + Keyword arguments: * source: An ascii or unicode string of Markdown formated text. - Returns: A serialized XHTML body. - + Returns: NanoDOM document. """ - if source is not None: #Allow blank string self.source = source - + if not self.source: return u"" - + try: self.source = unicode(self.source) except UnicodeDecodeError: message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.') return u"" - + # Fixup the source text self.source = self.source.replace("\r\n", "\n").replace("\r", "\n") self.source += "\n\n" @@ -1963,15 +2026,29 @@ class Markdown: for pp in self.textPreprocessors: self.source = pp.run(self.source) + + markdownTree = self._transform() + + + return markdownTree + + + + def convert (self, source=None): + """ + Return the document in XHTML format. - doc = self._transform() - xml = doc.toxml() + Keyword arguments: + + * source: An ascii or unicode string of Markdown formated text. + Returns: A serialized XHTML body. - # Return everything but the top level tag + """ - if self.stripTopLevelTags: - xml = xml.strip()[23:-7] + "\n" + tree = self.markdownToTree(source) + + xml = self.applyInlinePatterns(tree).toxml() for pp in self.textPostprocessors: xml = pp.run(xml) diff --git a/test-markdown.py b/test-markdown.py index 219db50..3007d72 100644 --- a/test-markdown.py +++ b/test-markdown.py @@ -353,14 +353,14 @@ print MARKDOWN_FILE markdown = __import__(MARKDOWN_FILE) -""" + #testDirectory("tests/basic") testDirectory("tests/markdown-test", measure_time=True) testDirectory("tests/misc", measure_time=True) #testDirectory("tests/extensions-x-footnotes") # testDirectory("tests/extensions-x-ext1-ext2") -testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape") """ +testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape") -testDirectory("tests2/php-markdown-cases-new", measure_time=True) -testDirectory("tests2/tm-cases-new", measure_time=True) +#testDirectory("tests2/php-markdown-cases-new", measure_time=True) +#testDirectory("tests2/tm-cases-new", measure_time=True) diff --git a/tests2/formater.py b/tests2/formater.py deleted file mode 100644 index 26d850f..0000000 --- a/tests2/formater.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import with_statement -import os - -excl_tm_cases = """basic_safe_mode -basic_safe_mode_escape -auto_link_safe_mode -code_safe_emphasis -emacs_head_vars -emacs_tail_vars -footnotes -footnotes_letters -footnotes_markup -footnotes_safe_mode_escape -nested_list_safe_mode -issue2_safe_mode_borks_markup -issue3_bad_code_color_hack -link_defn_spaces_in_url -link_patterns -link_patterns_double_hit -link_patterns_edge_cases -mismatched_footnotes -nested_lists_safe_mode -pyshell -syntax_color""" - - -def reformat(path, dest, ex=""): - excl = ex.split("\n") - for fname in os.listdir(path): - if fname.endswith(".html"): - if fname[:-5] in excl: - continue - res = processFile(path + fname) - with open(dest + fname, "w") as rfile: - rfile.write(res) - -def processFile(filePath): - with open(filePath) as f: - result = f.read() - result = result.replace("</pre>\n\n<p>", "</pre><p>") - result = result.replace("</pre>\n<", "</pre><") - result = result.replace("</li>", "\n</li>") - result = result.replace("<li>", "<li>\n") - result = result.replace(">\n<p>", "><p>") - result = result.replace("\" />", "\"/>") - result = result.replace("</p>\n", "\n</p>\n") - - return result - -if __name__ == "__main__": - reformat("php-markdown-cases/", "php-markdown-cases-new/") - reformat("tm-cases/", "tm-cases-new/", excl_tm_cases) - - -
\ No newline at end of file diff --git a/tests2/php-markdown-cases-new/Code block in a list item.tags b/tests2/php-markdown-cases-new/Code block in a list item.tags deleted file mode 100644 index a3d2682..0000000 --- a/tests2/php-markdown-cases-new/Code block in a list item.tags +++ /dev/null @@ -1 +0,0 @@ -knownfailure eol dontcare # fails just because of trailing EOL diff --git a/tests2/php-markdown-cases-new/Emphasis.tags b/tests2/php-markdown-cases-new/Emphasis.tags deleted file mode 100644 index 06cb210..0000000 --- a/tests2/php-markdown-cases-new/Emphasis.tags +++ /dev/null @@ -1 +0,0 @@ -knownfailure dontcare # need a re-write based on html5lib (or something) to fix diff --git a/tests2/php-markdown-cases-new/Inline HTML (Simple).tags b/tests2/php-markdown-cases-new/Inline HTML (Simple).tags deleted file mode 100644 index a3d2682..0000000 --- a/tests2/php-markdown-cases-new/Inline HTML (Simple).tags +++ /dev/null @@ -1 +0,0 @@ -knownfailure eol dontcare # fails just because of trailing EOL diff --git a/tests2/php-markdown-cases-new/Inline HTML (Span).tags b/tests2/php-markdown-cases-new/Inline HTML (Span).tags deleted file mode 100644 index 06cb210..0000000 --- a/tests2/php-markdown-cases-new/Inline HTML (Span).tags +++ /dev/null @@ -1 +0,0 @@ -knownfailure dontcare # need a re-write based on html5lib (or something) to fix diff --git a/tests2/php-markdown-cases-new/Ins & del.html~ b/tests2/php-markdown-cases-new/Ins & del.html~ deleted file mode 100644 index 82bfb2b..0000000 --- a/tests2/php-markdown-cases-new/Ins & del.html~ +++ /dev/null @@ -1,24 +0,0 @@ -<p>Here is a block tag ins: -</p> - -<ins> -<p>Some text -</p> -</ins> - -<ins>And here it is inside a paragraph.</ins> - - -<p>And here it is <ins>in the middle of</ins> a paragraph. -</p> - -<del> -<p>Some text -</p> -</del> - -<del>And here is ins as a paragraph.</del> - - -<p>And here it is <del>in the middle of</del> a paragraph. -</p> diff --git a/tests2/php-markdown-cases-new/Parens in URL.tags b/tests2/php-markdown-cases-new/Parens in URL.tags deleted file mode 100644 index 918b7e8..0000000 --- a/tests2/php-markdown-cases-new/Parens in URL.tags +++ /dev/null @@ -1 +0,0 @@ -dontcare knownfailure diff --git a/tests2/tm-cases-new/code_block_with_tabs.tags b/tests2/tm-cases-new/code_block_with_tabs.tags deleted file mode 100644 index 981bc5a..0000000 --- a/tests2/tm-cases-new/code_block_with_tabs.tags +++ /dev/null @@ -1 +0,0 @@ -fromphpmarkdown # from PHP Markdown test "Parens in URL.text" diff --git a/tests2/tm-cases-new/codespans_safe_mode.opts b/tests2/tm-cases-new/codespans_safe_mode.opts deleted file mode 100644 index ccb6a09..0000000 --- a/tests2/tm-cases-new/codespans_safe_mode.opts +++ /dev/null @@ -1 +0,0 @@ -{'safe_mode': True} diff --git a/tests2/tm-cases-new/codespans_safe_mode.tags b/tests2/tm-cases-new/codespans_safe_mode.tags deleted file mode 100644 index dcc8b6c..0000000 --- a/tests2/tm-cases-new/codespans_safe_mode.tags +++ /dev/null @@ -1 +0,0 @@ -issue9 safe_mode diff --git a/tests2/tm-cases-new/escapes.tags b/tests2/tm-cases-new/escapes.tags deleted file mode 100644 index 0184518..0000000 --- a/tests2/tm-cases-new/escapes.tags +++ /dev/null @@ -1 +0,0 @@ -issue15 diff --git a/tests2/tm-cases-new/parens_in_url_4.tags b/tests2/tm-cases-new/parens_in_url_4.tags deleted file mode 100644 index 981bc5a..0000000 --- a/tests2/tm-cases-new/parens_in_url_4.tags +++ /dev/null @@ -1 +0,0 @@ -fromphpmarkdown # from PHP Markdown test "Parens in URL.text" diff --git a/tests2/tm-cases-new/sublist-para.tags b/tests2/tm-cases-new/sublist-para.tags deleted file mode 100644 index 81b35c7..0000000 --- a/tests2/tm-cases-new/sublist-para.tags +++ /dev/null @@ -1 +0,0 @@ -questionable # <p><p> isn't really correct, but Markdown.pl does the same |