Attempting a refactoring, breaking markdown into multiple files.

author: Yuri Takhteyev <yuri@freewisdom.org> 2008-11-17 00:17:15 -0800
committer: Yuri Takhteyev <yuri@freewisdom.org> 2008-11-17 00:17:15 -0800
commit: 159a274a977c496434dbc484a1b253663cde4eed (patch)
tree: 53c9a6d3c69cbb8be3e6b47ea0d35cab075f03c9 /markdown/linepreprocessors.py
parent: 3dfcbc8d7900aa0f07124c9d7598cb7ecc2ff41b (diff)
download: markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.gz
markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.bz2
markdown-159a274a977c496434dbc484a1b253663cde4eed.zip
1 files changed, 177 insertions, 0 deletions
diff --git a/markdown/linepreprocessors.py b/markdown/linepreprocessors.py
new file mode 100644
index 0000000..998bdf8
--- /dev/null
+++ b/markdown/linepreprocessors.py
@@ -0,0 +1,177 @@
+
+"""
+PRE-PROCESSORS
+=============================================================================
+
+Preprocessors work on source text before we start doing anything too
+complicated. 
+"""
+
+import re
+import markdown
+
+class Processor:
+    def __init__(self, markdown_instance=None):
+        if markdown_instance:
+            self.markdown = markdown_instance
+
+class Preprocessor (Processor):
+    """
+    Preprocessors are run after the text is broken into lines.
+
+    Each preprocessor implements a "run" method that takes a pointer to a
+    list of lines of the document, modifies it as necessary and returns
+    either the same pointer or a pointer to a new list.
+
+    Preprocessors must extend markdown.Preprocessor.
+
+    """
+    def run(self, lines):
+        """
+        Each subclass of Preprocessor should override the `run` method, which
+        takes the document as a list of strings split by newlines and returns
+        the (possibly modified) list of lines.
+
+        """
+        pass
+
+
+class HtmlBlockPreprocessor(Preprocessor):
+    """Remove html blocks from the text and store them for later retrieval."""
+
+    right_tag_patterns = ["</%s>", "%s>"]
+
+    def _get_left_tag(self, block):
+        return block[1:].replace(">", " ", 1).split()[0].lower()
+
+    def _get_right_tag(self, left_tag, block):
+        for p in self.right_tag_patterns:
+            tag = p % left_tag
+            i = block.rfind(tag)
+            if i > 2:
+                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
+        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
+
+    def _equal_tags(self, left_tag, right_tag):
+        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
+            return True
+        if ("/" + left_tag) == right_tag:
+            return True
+        if (right_tag == "--" and left_tag == "--"):
+            return True
+        elif left_tag == right_tag[1:] \
+            and right_tag[0] != "<":
+            return True
+        else:
+            return False
+
+    def _is_oneliner(self, tag):
+        return (tag in ['hr', 'hr/'])
+
+    def run(self, lines):
+        text = "\n".join(lines)
+        new_blocks = []
+        text = text.split("\n\n")
+        items = []
+        left_tag = ''
+        right_tag = ''
+        in_tag = False # flag
+
+        while text:
+            block = text[0]
+            if block.startswith("\n"):
+                block = block[1:]
+            text = text[1:]
+
+            if block.startswith("\n"):
+                block = block[1:]
+
+            if not in_tag:
+                if block.startswith("<"):
+                    left_tag = self._get_left_tag(block)
+                    right_tag, data_index = self._get_right_tag(left_tag, block)
+
+                    if data_index < len(block):
+                        text.insert(0, block[data_index:])
+                        block = block[:data_index]
+
+                    if not (markdown.isBlockLevel(left_tag) \
+                        or block[1] in ["!", "?", "@", "%"]):
+                        new_blocks.append(block)
+                        continue
+
+                    if self._is_oneliner(left_tag):
+                        new_blocks.append(block.strip())
+                        continue
+
+                    if block[1] == "!":
+                        # is a comment block
+                        left_tag = "--"
+                        right_tag, data_index = self._get_right_tag(left_tag, block)
+                        # keep checking conditions below and maybe just append
+
+                    if block.rstrip().endswith(">") \
+                        and self._equal_tags(left_tag, right_tag):
+                        new_blocks.append(
+                            self.markdown.htmlStash.store(block.strip()))
+                        continue
+                    else: #if not block[1] == "!":
+                        # if is block level tag and is not complete
+
+                        if markdown.isBlockLevel(left_tag) or left_tag == "--" \
+                        and not block.rstrip().endswith(">"):
+                            items.append(block.strip())
+                            in_tag = True
+                        else:
+                            new_blocks.append(
+                            self.markdown.htmlStash.store(block.strip()))
+
+                        continue
+
+                new_blocks.append(block)
+
+            else:
+                items.append(block.strip())
+
+                right_tag, data_index = self._get_right_tag(left_tag, block)
+
+                if self._equal_tags(left_tag, right_tag):
+                    # if find closing tag
+                    in_tag = False
+                    new_blocks.append(
+                        self.markdown.htmlStash.store('\n\n'.join(items)))
+                    items = []
+
+        if items:
+            new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
+            new_blocks.append('\n')
+
+        new_text = "\n\n".join(new_blocks)
+        return new_text.split("\n")
+
+
+class ReferencePreprocessor(Preprocessor):
+    """ Remove reference definitions from text and store for later use. """
+
+    RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
+
+    def run (self, lines):
+        new_text = [];
+        for line in lines:
+            m = self.RE.match(line)
+            if m:
+                id = m.group(2).strip().lower()
+                t = m.group(4).strip()  # potential title
+                if not t:
+                    self.markdown.references[id] = (m.group(3), t)
+                elif (len(t) >= 2
+                      and (t[0] == t[-1] == "\""
+                           or t[0] == t[-1] == "\'"
+                           or (t[0] == "(" and t[-1] == ")") ) ):
+                    self.markdown.references[id] = (m.group(3), t[1:-1])
+                else:
+                    new_text.append(line)
+            else:
+                new_text.append(line)
+
+        return new_text #+ "\n"
author	Yuri Takhteyev <yuri@freewisdom.org>	2008-11-17 00:17:15 -0800
committer	Yuri Takhteyev <yuri@freewisdom.org>	2008-11-17 00:17:15 -0800
commit	159a274a977c496434dbc484a1b253663cde4eed (patch)
tree	53c9a6d3c69cbb8be3e6b47ea0d35cab075f03c9 /markdown/linepreprocessors.py
parent	3dfcbc8d7900aa0f07124c9d7598cb7ecc2ff41b (diff)
download	markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.gz markdown-159a274a977c496434dbc484a1b253663cde4eed.tar.bz2 markdown-159a274a977c496434dbc484a1b253663cde4eed.zip