From 517d38e552e91ebbe527a0286d43dd1daa585bcc Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan@gmail.com>
Date: Thu, 20 Nov 2008 19:38:09 -0500
Subject: Cleaned up recent refactor into a package from a single file.

---
 markdown/__init__.py          | 163 ++++++++++++++++----------------
 markdown/linepreprocessors.py | 214 ------------------------------------------
 markdown/postprocessors.py    |  18 +++-
 markdown/preprocessors.py     | 214 ++++++++++++++++++++++++++++++++++++++++++
 markdown/treeprocessors.py    |  24 +++--
 5 files changed, 327 insertions(+), 306 deletions(-)
 mode change 100755 => 100644 markdown/__init__.py
 delete mode 100644 markdown/linepreprocessors.py
 create mode 100644 markdown/preprocessors.py
diff --git a/markdown/__init__.py b/markdown/__init__.py
old mode 100755
new mode 100644
index 27cb9ff..8eacd45
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -64,8 +64,8 @@ ENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
 SMART_EMPHASIS = True        # this_or_that does not become this<i>or</i>that
 HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
 BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
-                                  +"|script|noscript|form|fieldset|iframe|math"
-                                  +"|ins|del|hr|hr/|style|li|dt|dd|tr")
+                                  "|script|noscript|form|fieldset|iframe|math"
+                                  "|ins|del|hr|hr/|style|li|dt|dd|tr")
 
 # Placeholders
 STX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
@@ -74,12 +74,18 @@ INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
 AMP_SUBSTITUTE = STX+"amp"+ETX
 
-import linepreprocessors, blockprocessors, treeprocessors, inlinepatterns
+import preprocessors, blockprocessors, treeprocessors, inlinepatterns
 import postprocessors
 import blockparser
 import etree_loader
 import odict
 
+# Extensions should use "markdown.etree" instead of "etree" (or do `from
+# markdown import etree`).  Do not import it by yourself.
+
+etree = etree_loader.importETree()
+
+
 """
 Constants you probably do not need to change
 -----------------------------------------------------------------------------
@@ -108,6 +114,15 @@ def isBlockLevel(tag):
     """Check if the tag is a block level HTML tag."""
     return BLOCK_LEVEL_ELEMENTS.match(tag)
 
+"""
+MISC AUXILIARY CLASSES
+=============================================================================
+"""
+
+class AtomicString(unicode):
+    """A string which should not be further processed."""
+    pass
+
 
 """
 OVERALL DESIGN
@@ -127,46 +142,9 @@ Markdown processing takes place in four steps:
 
 Those steps are put together by the Markdown() class.
 
-The code below is organized as follows:
-
-1. BlockParser and it's BlockProcessors - does core block parsing.
-2. All the preprocessors, patterns, treeprocessors, and postprocessors.
-3. Markdown class - does the high-level wrapping.
-"""
-
-
-
-
-
-"""
-POST-PROCESSORS
-=============================================================================
-
-Markdown also allows post-processors, which are similar to preprocessors in
-that they need to implement a "run" method. However, they are run after core
-processing.
-
-There are two types of post-processors: Treeprocessor and Postprocessor
 """
 
 
-
-
-"""
-MISC AUXILIARY CLASSES
-=============================================================================
-"""
-
-class AtomicString(unicode):
-    """A string which should not be further processed."""
-    pass
-
-
-"""
-Markdown
-=============================================================================
-"""
-
 class Markdown:
     """Convert Markdown to HTML."""
 
@@ -195,66 +173,93 @@ class Markdown:
 
         # Preprocessors
         self.preprocessors = odict.OrderedDict()
-        self.preprocessors["html_block"] =  linepreprocessors.HtmlBlockPreprocessor(self)
-        self.preprocessors["reference"] = linepreprocessors.ReferencePreprocessor(self)
+        self.preprocessors["html_block"] = \
+                preprocessors.HtmlBlockPreprocessor(self)
+        self.preprocessors["reference"] = \
+                preprocessors.ReferencePreprocessor(self)
         # footnote preprocessor will be inserted with "<reference"
 
         # Block processors - ran by the parser
         self.parser = blockparser.BlockParser()
-        self.parser.blockprocessors['empty'] = blockprocessors.EmptyBlockProcessor(self.parser)
-        self.parser.blockprocessors['indent'] = blockprocessors.ListIndentProcessor(self.parser)
-        self.parser.blockprocessors['code'] = blockprocessors.CodeBlockProcessor(self.parser)
-        self.parser.blockprocessors['hashheader'] = blockprocessors.HashHeaderProcessor(self.parser)
-        self.parser.blockprocessors['setextheader'] = blockprocessors.SetextHeaderProcessor(self.parser)
-        self.parser.blockprocessors['hr'] = blockprocessors.HRProcessor(self.parser)
-        self.parser.blockprocessors['olist'] = blockprocessors.OListProcessor(self.parser)
-        self.parser.blockprocessors['ulist'] = blockprocessors.UListProcessor(self.parser)
-        self.parser.blockprocessors['quote'] = blockprocessors.BlockQuoteProcessor(self.parser)
-        self.parser.blockprocessors['paragraph'] = blockprocessors.ParagraphProcessor(self.parser)
-
-
-        self.prePatterns = []
+        self.parser.blockprocessors['empty'] = \
+                blockprocessors.EmptyBlockProcessor(self.parser)
+        self.parser.blockprocessors['indent'] = \
+                blockprocessors.ListIndentProcessor(self.parser)
+        self.parser.blockprocessors['code'] = \
+                blockprocessors.CodeBlockProcessor(self.parser)
+        self.parser.blockprocessors['hashheader'] = \
+                blockprocessors.HashHeaderProcessor(self.parser)
+        self.parser.blockprocessors['setextheader'] = \
+                blockprocessors.SetextHeaderProcessor(self.parser)
+        self.parser.blockprocessors['hr'] = \
+                blockprocessors.HRProcessor(self.parser)
+        self.parser.blockprocessors['olist'] = \
+                blockprocessors.OListProcessor(self.parser)
+        self.parser.blockprocessors['ulist'] = \
+                blockprocessors.UListProcessor(self.parser)
+        self.parser.blockprocessors['quote'] = \
+                blockprocessors.BlockQuoteProcessor(self.parser)
+        self.parser.blockprocessors['paragraph'] = \
+                blockprocessors.ParagraphProcessor(self.parser)
+
+
+        #self.prePatterns = []
 
         # Inline patterns - Run on the tree
         self.inlinePatterns = odict.OrderedDict()
-        self.inlinePatterns["backtick"] = inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
-        self.inlinePatterns["escape"] = inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
-        self.inlinePatterns["reference"] = inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
-        self.inlinePatterns["link"] = inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
-        self.inlinePatterns["image_link"] = inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
+        self.inlinePatterns["backtick"] = \
+                inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
+        self.inlinePatterns["escape"] = \
+                inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
+        self.inlinePatterns["reference"] = \
+            inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
+        self.inlinePatterns["link"] = \
+                inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
+        self.inlinePatterns["image_link"] = \
+                inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
         self.inlinePatterns["image_reference"] = \
-                            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
-        self.inlinePatterns["autolink"] = inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
-        self.inlinePatterns["automail"] = inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
+            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
+        self.inlinePatterns["autolink"] = \
+            inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
+        self.inlinePatterns["automail"] = \
+            inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
         self.inlinePatterns["linebreak2"] = \
-                            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
+            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
         self.inlinePatterns["linebreak"] = \
-                            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
-        self.inlinePatterns["html"] = inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
-        self.inlinePatterns["entity"] = inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
-        self.inlinePatterns["not_strong"] = inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
+            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
+        self.inlinePatterns["html"] = \
+                inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
+        self.inlinePatterns["entity"] = \
+                inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
+        self.inlinePatterns["not_strong"] = \
+                inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
         self.inlinePatterns["strong_em"] = \
-                            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
-        self.inlinePatterns["strong"] = inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
-        self.inlinePatterns["emphasis"] = inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
+            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
+        self.inlinePatterns["strong"] = \
+            inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
+        self.inlinePatterns["emphasis"] = \
+            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
         self.inlinePatterns["emphasis2"] = \
-                            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
+            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
         # The order of the handlers matters!!!
 
 
         # Tree processors - run once we have a basic parse.
         self.treeprocessors = odict.OrderedDict()
         self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
-        self.treeprocessors["prettify"] = treeprocessors.PrettifyTreeprocessor(self)
+        self.treeprocessors["prettify"] = \
+                treeprocessors.PrettifyTreeprocessor(self)
 
         # Postprocessors - finishing touches.
         self.postprocessors = odict.OrderedDict()
-        self.postprocessors["raw_html"] = postprocessors.RawHtmlPostprocessor(self)
-        self.postprocessors["amp_substitute"] = postprocessors.AndSubstitutePostprocessor()
+        self.postprocessors["raw_html"] = \
+                postprocessors.RawHtmlPostprocessor(self)
+        self.postprocessors["amp_substitute"] = \
+                postprocessors.AndSubstitutePostprocessor()
         # footnote postprocessor will be inserted with ">amp_substitute"
 
         self.references = {}
-        self.htmlStash = linepreprocessors.HtmlStash()
+        self.htmlStash = preprocessors.HtmlStash()
         self.registerExtensions(extensions = extensions,
                                 configs = extension_configs)
         self.reset()
@@ -427,6 +432,7 @@ class Extension:
         """
         pass
 
+
 def load_extension(ext_name, configs = []):
     """Load extension by name, then return the module.
 
@@ -466,6 +472,7 @@ def load_extension(ext_name, configs = []):
     except:
         message(CRITICAL, "Failed to instantiate extension '%s'" % ext_name)
 
+
 def load_extensions(ext_names):
     """Loads multiple extensions"""
     extensions = []
@@ -475,10 +482,6 @@ def load_extensions(ext_names):
             extensions.append(extension)
     return extensions
 
-# Extensions should use "markdown.etree" instead of "etree" (or do `from
-# markdown import etree`).  Do not import it by yourself.
-
-etree = etree_loader.importETree()
 
 """
 EXPORTED FUNCTIONS
diff --git a/markdown/linepreprocessors.py b/markdown/linepreprocessors.py
deleted file mode 100644
index 712a1e8..0000000
--- a/markdown/linepreprocessors.py
+++ /dev/null
@@ -1,214 +0,0 @@
-
-"""
-PRE-PROCESSORS
-=============================================================================
-
-Preprocessors work on source text before we start doing anything too
-complicated. 
-"""
-
-import re
-import markdown
-
-HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:"
-HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX
-
-class Processor:
-    def __init__(self, markdown_instance=None):
-        if markdown_instance:
-            self.markdown = markdown_instance
-
-class Preprocessor (Processor):
-    """
-    Preprocessors are run after the text is broken into lines.
-
-    Each preprocessor implements a "run" method that takes a pointer to a
-    list of lines of the document, modifies it as necessary and returns
-    either the same pointer or a pointer to a new list.
-
-    Preprocessors must extend markdown.Preprocessor.
-
-    """
-    def run(self, lines):
-        """
-        Each subclass of Preprocessor should override the `run` method, which
-        takes the document as a list of strings split by newlines and returns
-        the (possibly modified) list of lines.
-
-        """
-        pass
-
-class HtmlStash:
-    """
-    This class is used for stashing HTML objects that we extract
-    in the beginning and replace with place-holders.
-    """
-
-    def __init__ (self):
-        """ Create a HtmlStash. """
-        self.html_counter = 0 # for counting inline html segments
-        self.rawHtmlBlocks=[]
-
-    def store(self, html, safe=False):
-        """
-        Saves an HTML segment for later reinsertion.  Returns a
-        placeholder string that needs to be inserted into the
-        document.
-
-        Keyword arguments:
-
-        * html: an html segment
-        * safe: label an html segment as safe for safemode
-
-        Returns : a placeholder string
-
-        """
-        self.rawHtmlBlocks.append((html, safe))
-        placeholder = HTML_PLACEHOLDER % self.html_counter
-        self.html_counter += 1
-        return placeholder
-
-    def reset(self):
-        self.html_counter = 0
-        self.rawHtmlBlocks = []
-
-
-class HtmlBlockPreprocessor(Preprocessor):
-    """Remove html blocks from the text and store them for later retrieval."""
-
-    right_tag_patterns = ["</%s>", "%s>"]
-
-    def _get_left_tag(self, block):
-        return block[1:].replace(">", " ", 1).split()[0].lower()
-
-    def _get_right_tag(self, left_tag, block):
-        for p in self.right_tag_patterns:
-            tag = p % left_tag
-            i = block.rfind(tag)
-            if i > 2:
-                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
-        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
-
-    def _equal_tags(self, left_tag, right_tag):
-        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
-            return True
-        if ("/" + left_tag) == right_tag:
-            return True
-        if (right_tag == "--" and left_tag == "--"):
-            return True
-        elif left_tag == right_tag[1:] \
-            and right_tag[0] != "<":
-            return True
-        else:
-            return False
-
-    def _is_oneliner(self, tag):
-        return (tag in ['hr', 'hr/'])
-
-    def run(self, lines):
-        text = "\n".join(lines)
-        new_blocks = []
-        text = text.split("\n\n")
-        items = []
-        left_tag = ''
-        right_tag = ''
-        in_tag = False # flag
-
-        while text:
-            block = text[0]
-            if block.startswith("\n"):
-                block = block[1:]
-            text = text[1:]
-
-            if block.startswith("\n"):
-                block = block[1:]
-
-            if not in_tag:
-                if block.startswith("<"):
-                    left_tag = self._get_left_tag(block)
-                    right_tag, data_index = self._get_right_tag(left_tag, block)
-
-                    if data_index < len(block):
-                        text.insert(0, block[data_index:])
-                        block = block[:data_index]
-
-                    if not (markdown.isBlockLevel(left_tag) \
-                        or block[1] in ["!", "?", "@", "%"]):
-                        new_blocks.append(block)
-                        continue
-
-                    if self._is_oneliner(left_tag):
-                        new_blocks.append(block.strip())
-                        continue
-
-                    if block[1] == "!":
-                        # is a comment block
-                        left_tag = "--"
-                        right_tag, data_index = self._get_right_tag(left_tag, block)
-                        # keep checking conditions below and maybe just append
-
-                    if block.rstrip().endswith(">") \
-                        and self._equal_tags(left_tag, right_tag):
-                        new_blocks.append(
-                            self.markdown.htmlStash.store(block.strip()))
-                        continue
-                    else: #if not block[1] == "!":
-                        # if is block level tag and is not complete
-
-                        if markdown.isBlockLevel(left_tag) or left_tag == "--" \
-                        and not block.rstrip().endswith(">"):
-                            items.append(block.strip())
-                            in_tag = True
-                        else:
-                            new_blocks.append(
-                            self.markdown.htmlStash.store(block.strip()))
-
-                        continue
-
-                new_blocks.append(block)
-
-            else:
-                items.append(block.strip())
-
-                right_tag, data_index = self._get_right_tag(left_tag, block)
-
-                if self._equal_tags(left_tag, right_tag):
-                    # if find closing tag
-                    in_tag = False
-                    new_blocks.append(
-                        self.markdown.htmlStash.store('\n\n'.join(items)))
-                    items = []
-
-        if items:
-            new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
-            new_blocks.append('\n')
-
-        new_text = "\n\n".join(new_blocks)
-        return new_text.split("\n")
-
-
-class ReferencePreprocessor(Preprocessor):
-    """ Remove reference definitions from text and store for later use. """
-
-    RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
-
-    def run (self, lines):
-        new_text = [];
-        for line in lines:
-            m = self.RE.match(line)
-            if m:
-                id = m.group(2).strip().lower()
-                t = m.group(4).strip()  # potential title
-                if not t:
-                    self.markdown.references[id] = (m.group(3), t)
-                elif (len(t) >= 2
-                      and (t[0] == t[-1] == "\""
-                           or t[0] == t[-1] == "\'"
-                           or (t[0] == "(" and t[-1] == ")") ) ):
-                    self.markdown.references[id] = (m.group(3), t[1:-1])
-                else:
-                    new_text.append(line)
-            else:
-                new_text.append(line)
-
-        return new_text #+ "\n"
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index cd872cf..80227bb 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -1,3 +1,13 @@
+"""
+POST-PROCESSORS
+=============================================================================
+
+Markdown also allows post-processors, which are similar to preprocessors in
+that they need to implement a "run" method. However, they are run after core
+processing.
+
+"""
+
 
 import markdown
 
@@ -42,9 +52,11 @@ class RawHtmlPostprocessor(Postprocessor):
                 else:
                     html = markdown.HTML_REMOVED_TEXT
             if safe or not self.markdown.safeMode:
-                text = text.replace("<p>%s</p>" % (markdown.linepreprocessors.HTML_PLACEHOLDER % i),
-                                    html + "\n")
-            text =  text.replace(markdown.linepreprocessors.HTML_PLACEHOLDER % i, html)
+                text = text.replace("<p>%s</p>" % 
+                            (markdown.preprocessors.HTML_PLACEHOLDER % i),
+                            html + "\n")
+            text =  text.replace(markdown.preprocessors.HTML_PLACEHOLDER % i, 
+                                 html)
         return text
 
     def escape(self, html):
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
new file mode 100644
index 0000000..712a1e8
--- /dev/null
+++ b/markdown/preprocessors.py
@@ -0,0 +1,214 @@
+
+"""
+PRE-PROCESSORS
+=============================================================================
+
+Preprocessors work on source text before we start doing anything too
+complicated. 
+"""
+
+import re
+import markdown
+
+HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:"
+HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX
+
+class Processor:
+    def __init__(self, markdown_instance=None):
+        if markdown_instance:
+            self.markdown = markdown_instance
+
+class Preprocessor (Processor):
+    """
+    Preprocessors are run after the text is broken into lines.
+
+    Each preprocessor implements a "run" method that takes a pointer to a
+    list of lines of the document, modifies it as necessary and returns
+    either the same pointer or a pointer to a new list.
+
+    Preprocessors must extend markdown.Preprocessor.
+
+    """
+    def run(self, lines):
+        """
+        Each subclass of Preprocessor should override the `run` method, which
+        takes the document as a list of strings split by newlines and returns
+        the (possibly modified) list of lines.
+
+        """
+        pass
+
+class HtmlStash:
+    """
+    This class is used for stashing HTML objects that we extract
+    in the beginning and replace with place-holders.
+    """
+
+    def __init__ (self):
+        """ Create a HtmlStash. """
+        self.html_counter = 0 # for counting inline html segments
+        self.rawHtmlBlocks=[]
+
+    def store(self, html, safe=False):
+        """
+        Saves an HTML segment for later reinsertion.  Returns a
+        placeholder string that needs to be inserted into the
+        document.
+
+        Keyword arguments:
+
+        * html: an html segment
+        * safe: label an html segment as safe for safemode
+
+        Returns : a placeholder string
+
+        """
+        self.rawHtmlBlocks.append((html, safe))
+        placeholder = HTML_PLACEHOLDER % self.html_counter
+        self.html_counter += 1
+        return placeholder
+
+    def reset(self):
+        self.html_counter = 0
+        self.rawHtmlBlocks = []
+
+
+class HtmlBlockPreprocessor(Preprocessor):
+    """Remove html blocks from the text and store them for later retrieval."""
+
+    right_tag_patterns = ["</%s>", "%s>"]
+
+    def _get_left_tag(self, block):
+        return block[1:].replace(">", " ", 1).split()[0].lower()
+
+    def _get_right_tag(self, left_tag, block):
+        for p in self.right_tag_patterns:
+            tag = p % left_tag
+            i = block.rfind(tag)
+            if i > 2:
+                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
+        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
+
+    def _equal_tags(self, left_tag, right_tag):
+        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
+            return True
+        if ("/" + left_tag) == right_tag:
+            return True
+        if (right_tag == "--" and left_tag == "--"):
+            return True
+        elif left_tag == right_tag[1:] \
+            and right_tag[0] != "<":
+            return True
+        else:
+            return False
+
+    def _is_oneliner(self, tag):
+        return (tag in ['hr', 'hr/'])
+
+    def run(self, lines):
+        text = "\n".join(lines)
+        new_blocks = []
+        text = text.split("\n\n")
+        items = []
+        left_tag = ''
+        right_tag = ''
+        in_tag = False # flag
+
+        while text:
+            block = text[0]
+            if block.startswith("\n"):
+                block = block[1:]
+            text = text[1:]
+
+            if block.startswith("\n"):
+                block = block[1:]
+
+            if not in_tag:
+                if block.startswith("<"):
+                    left_tag = self._get_left_tag(block)
+                    right_tag, data_index = self._get_right_tag(left_tag, block)
+
+                    if data_index < len(block):
+                        text.insert(0, block[data_index:])
+                        block = block[:data_index]
+
+                    if not (markdown.isBlockLevel(left_tag) \
+                        or block[1] in ["!", "?", "@", "%"]):
+                        new_blocks.append(block)
+                        continue
+
+                    if self._is_oneliner(left_tag):
+                        new_blocks.append(block.strip())
+                        continue
+
+                    if block[1] == "!":
+                        # is a comment block
+                        left_tag = "--"
+                        right_tag, data_index = self._get_right_tag(left_tag, block)
+                        # keep checking conditions below and maybe just append
+
+                    if block.rstrip().endswith(">") \
+                        and self._equal_tags(left_tag, right_tag):
+                        new_blocks.append(
+                            self.markdown.htmlStash.store(block.strip()))
+                        continue
+                    else: #if not block[1] == "!":
+                        # if is block level tag and is not complete
+
+                        if markdown.isBlockLevel(left_tag) or left_tag == "--" \
+                        and not block.rstrip().endswith(">"):
+                            items.append(block.strip())
+                            in_tag = True
+                        else:
+                            new_blocks.append(
+                            self.markdown.htmlStash.store(block.strip()))
+
+                        continue
+
+                new_blocks.append(block)
+
+            else:
+                items.append(block.strip())
+
+                right_tag, data_index = self._get_right_tag(left_tag, block)
+
+                if self._equal_tags(left_tag, right_tag):
+                    # if find closing tag
+                    in_tag = False
+                    new_blocks.append(
+                        self.markdown.htmlStash.store('\n\n'.join(items)))
+                    items = []
+
+        if items:
+            new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
+            new_blocks.append('\n')
+
+        new_text = "\n\n".join(new_blocks)
+        return new_text.split("\n")
+
+
+class ReferencePreprocessor(Preprocessor):
+    """ Remove reference definitions from text and store for later use. """
+
+    RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
+
+    def run (self, lines):
+        new_text = [];
+        for line in lines:
+            m = self.RE.match(line)
+            if m:
+                id = m.group(2).strip().lower()
+                t = m.group(4).strip()  # potential title
+                if not t:
+                    self.markdown.references[id] = (m.group(3), t)
+                elif (len(t) >= 2
+                      and (t[0] == t[-1] == "\""
+                           or t[0] == t[-1] == "\'"
+                           or (t[0] == "(" and t[-1] == ")") ) ):
+                    self.markdown.references[id] = (m.group(3), t[1:-1])
+                else:
+                    new_text.append(line)
+            else:
+                new_text.append(line)
+
+        return new_text #+ "\n"
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index e8d7cd0..0ea0de2 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -277,26 +277,31 @@ class InlineProcessor(Treeprocessor):
 
             for element, lst in insertQueue:
                 if element.text:
-                    element.text = markdown.inlinepatterns.handleAttributes(element.text, element)
+                    element.text = \
+                        markdown.inlinepatterns.handleAttributes(element.text, 
+                                                                 element)
                 i = 0
                 for newChild in lst:
                     # Processing attributes
                     if newChild.tail:
-                        newChild.tail = markdown.inlinepatterns.handleAttributes(newChild.tail,
-                                                         element)
+                        newChild.tail = \
+                            markdown.inlinepatterns.handleAttributes(newChild.tail,
+                                                                     element)
                     if newChild.text:
-                        newChild.text = markdown.inlinepatterns.handleAttributes(newChild.text,
-                                                         newChild)
+                        newChild.text = \
+                            markdown.inlinepatterns.handleAttributes(newChild.text,
+                                                                     newChild)
                     element.insert(i, newChild)
                     i += 1
-
         return tree
 
 
 class PrettifyTreeprocessor(Treeprocessor):
-    """Add linebreaks to the html document."""
+    """ Add linebreaks to the html document. """
+
     def _prettifyETree(self, elem):
-        """Recursively add linebreaks to ElementTree children."""
+        """ Recursively add linebreaks to ElementTree children. """
+
         i = "\n"
         if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
             if (not elem.text or not elem.text.strip()) \
@@ -311,7 +316,8 @@ class PrettifyTreeprocessor(Treeprocessor):
             elem.tail = i
 
     def run(self, root):
-        """.Add linebreaks to ElementTree root object."""
+        """ Add linebreaks to ElementTree root object. """
+
         self._prettifyETree(root)
         # Do <br />'s seperately as they are often in the middle of
         # inline content and missed by _prettifyETree.
-- 
cgit v1.2.3