From 8e7e2a254859f61ec2632a11725d99c5ea0c9f09 Mon Sep 17 00:00:00 2001
From: Yuri Takhteyev <yuri@freewisdom.org>
Date: Sun, 12 Oct 2008 18:02:07 -0700
Subject: More cleanup.  Refactored all the core parsing logic into a separate
 class: MarkdownParser.

---
 markdown.py | 2648 ++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 1335 insertions(+), 1313 deletions(-)

(limited to 'markdown.py')

diff --git a/markdown.py b/markdown.py
index 562380a..0aa530d 100755
--- a/markdown.py
+++ b/markdown.py
@@ -97,6 +97,24 @@ INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
 INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
 AMP_SUBSTITUTE = STX+"amp"+ETX 
 
+def wrapRe(raw_re) : return re.compile("^%s$" % raw_re, re.DOTALL)
+CORE_RE = {
+    'header':          wrapRe(r'(#{1,6})[ \t]*(.*?)[ \t]*(#*)'), # # A title
+    'reference-def':   wrapRe(r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)'),
+                               # [Google]: http://www.google.com/
+    'containsline':    wrapRe(r'([-]*)$|^([=]*)'), # -----, =====, etc.
+    'ol':              wrapRe(r'[ ]{0,3}[\d]*\.\s+(.*)'), # 1. text
+    'ul':              wrapRe(r'[ ]{0,3}[*+-]\s+(.*)'), # "* text"
+    'isline1':         wrapRe(r'(\**)'), # ***
+    'isline2':         wrapRe(r'(\-*)'), # ---
+    'isline3':         wrapRe(r'(\_*)'), # ___
+    'tabbed':          wrapRe(r'((\t)|(    ))(.*)'), # an indented line
+    'quoted':          wrapRe(r'[ ]{0,2}> ?(.*)'), # a quoted block ("> ...")
+    'containsline':    re.compile(r'^([-]*)$|^([=]*)$', re.M),
+    'attr':            re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123}
+}
+"""Basic and reusable regular expressions."""
+
 
 """
 AUXILIARY GLOBAL FUNCTIONS
@@ -160,1385 +178,1419 @@ def dequote(string):
 
 
 """
-PRE-PROCESSORS
+OVERALL DESIGN
 =============================================================================
 
-Preprocessors work on source text before we start doing anything too
-complicated.  There are two types of preprocessors: TextPreprocessor and
-Preprocessor.
-"""
+Markdown processing takes place in three steps:
 
-class TextPreprocessor:
-    """
-    TextPreprocessors are run before the text is broken into lines.
-    
-    Each TextPreprocessor implements a "run" method that takes a pointer to a
-    text string of the document, modifies it as necessary and returns
-    either the same pointer or a pointer to a new string.  
-    
-    TextPreprocessors must extend markdown.TextPreprocessor.
+1. A bunch of "preprocessors" munge the input text.
+2. MarkdownParser() parses the high-level structural elements of the
+   pre-processed text into an ElementTree.
+3. A bunch of Patterns are run against the ElementTree, detecting inline
+   markup.
+4. Some extra use-defined post-processors are run.
+5. The output is written to a string.
 
-    """
+Those steps are put together by the Markdown() class.
 
-    def run(self, text):
-        """ 
-        Each subclass of TextPreprocessor should override the `run` method, 
-        which takes the document text as a single string and returns the 
-        (possibly modified) document as a single string.
-        
-        """
-        pass
+The code below is organized as follows:
 
+1. MarkdownParser class - does basic parsing.
+2. All the post-processors, patterns, etc.
+3. Markdown class - does the high-level wrapping.
+"""
 
-class Preprocessor:
-    """
-    Preprocessors are run after the text is broken into lines.
 
-    Each preprocessor implements a "run" method that takes a pointer to a
-    list of lines of the document, modifies it as necessary and returns
-    either the same pointer or a pointer to a new list.  
-    
-    Preprocessors must extend markdown.Preprocessor.
-    
-    """
+"""
+CORE MARKDOWN PARSER
+=============================================================================
 
-    def run(self, lines):
-        """
-        Each subclass of Preprocessor should override the `run` method, which
-        takes the document as a list of strings split by newlines and returns
-        the (possibly modified) list of lines.
+This class handles basic Markdown parsing.  It doesn't concern itself with
+inline elements such as **bold** or *italics*, but rather just catches blocks,
+lists, quotes, etc.
+"""
 
-        """
+class MarkdownParser:
+    """Parser Markdown into a ElementTree."""
+
+    def __init__(self):
         pass
- 
 
-class HtmlBlockPreprocessor(TextPreprocessor):
-    """Remove html blocks from the text and store them for later retrieval."""
+    def parseDocument(self, lines):
+        """Parse a markdown string into an ElementTree."""
+        # Create a ElementTree from the lines
+        root = etree.Element("div")
+        buffer = []
+        for line in lines:
+            if line.startswith("#"):
+                self.parseChunk(root, buffer)
+                buffer = [line]
+            else:
+                buffer.append(line)
 
-    right_tag_patterns = ["</%s>", "%s>"]
+        self.parseChunk(root, buffer)
     
-    def _get_left_tag(self, block):
-        return block[1:].replace(">", " ", 1).split()[0].lower()
+        return etree.ElementTree(root)
 
-    def _get_right_tag(self, left_tag, block):        
-        for p in self.right_tag_patterns:
-            tag = p % left_tag
-            i = block.rfind(tag)
-            if i > 2:
-                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
-        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
-
-    def _equal_tags(self, left_tag, right_tag):
-        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
-            return True
-        if ("/" + left_tag) == right_tag:
-            return True
-        if (right_tag == "--" and left_tag == "--"):
-            return True
-        elif left_tag == right_tag[1:] \
-            and right_tag[0] != "<":
-            return True
-        else:
-            return False
 
-    def _is_oneliner(self, tag):
-        return (tag in ['hr', 'hr/'])
+    def parseChunk(self, parent_elem, lines, inList=0, looseList=0):
+        """Process a chunk of markdown-formatted text and attach the parse to
+        an ElementTree node.
 
-    def run(self, text):
-        new_blocks = []
-        text = text.split("\n\n")        
-        items = []
-        left_tag = ''
-        right_tag = ''
-        in_tag = False # flag
+        Process a section of a source document, looking for high
+        level structural elements like lists, block quotes, code
+        segments, html blocks, etc.  Some those then get stripped
+        of their high level markup (e.g. get unindented) and the
+        lower-level markup is processed recursively.
 
-        while text:
-            block = text[0]
-            if block.startswith("\n"):
-                block = block[1:]
-            text = text[1:]
+        Keyword arguments:
+        
+        * parent_elem: A ElementTree element to which the content will be added.
+        * lines: a list of lines
+        * inList: a level
+        
+        Returns: None
+        
+        """
+        # Loop through lines until none left.
+        while lines:
             
-            if block.startswith("\n"):
-                block = block[1:]
-
-            if not in_tag:
-                if block.startswith("<"):
-                    left_tag = self._get_left_tag(block)
-                    right_tag, data_index = self._get_right_tag(left_tag, block)
-                    
-                    if data_index < len(block):
-                        text.insert(0, block[data_index:])
-                        block = block[:data_index]
+            # Skipping empty line
+            if not lines[0]:
+                lines = lines[1:]
+                continue
+            
+            # Check if this section starts with a list, a blockquote or
+            # a code block
 
-                    if not (isBlockLevel(left_tag) \
-                        or block[1] in ["!", "?", "@", "%"]):
-                        new_blocks.append(block)
-                        continue
+            processFn = { 'ul':     self._processUList,
+                          'ol':     self._processOList,
+                          'quoted': self._processQuote,
+                          'tabbed': self._processCodeBlock}
 
-                    if self._is_oneliner(left_tag):
-                        new_blocks.append(block.strip())
-                        continue
-                        
-                    if block[1] == "!":
-                        # is a comment block
-                        left_tag = "--"
-                        right_tag, data_index = self._get_right_tag(left_tag, block)
-                        # keep checking conditions below and maybe just append
-                        
-                    if block.rstrip().endswith(">") \
-                        and self._equal_tags(left_tag, right_tag):
-                        new_blocks.append(
-                            self.stash.store(block.strip()))
-                        continue
-                    else: #if not block[1] == "!":
-                        # if is block level tag and is not complete
-                        
-                        if isBlockLevel(left_tag) or left_tag == "--" \
-                        and not block.rstrip().endswith(">"):
-                            items.append(block.strip())
-                            in_tag = True
-                        else:
-                            new_blocks.append(
-                            self.stash.store(block.strip()))
-                            
-                        continue
+            for regexp in ['ul', 'ol', 'quoted', 'tabbed']:
+                m = CORE_RE[regexp].match(lines[0])
+                if m:
+                    processFn[regexp](parent_elem, lines, inList)
+                    return
 
-                new_blocks.append(block)
+            # We are NOT looking at one of the high-level structures like
+            # lists or blockquotes.  So, it's just a regular paragraph
+            # (though perhaps nested inside a list or something else).  If
+            # we are NOT inside a list, we just need to look for a blank
+            # line to find the end of the block.  If we ARE inside a
+            # list, however, we need to consider that a sublist does not
+            # need to be separated by a blank line.  Rather, the following
+            # markup is legal:
+            #
+            # * The top level list item
+            #
+            #     Another paragraph of the list.  This is where we are now.
+            #     * Underneath we might have a sublist.
+            #
 
-            else:
-                items.append(block.strip())
-                
-                right_tag, data_index = self._get_right_tag(left_tag, block)
-                
-                if self._equal_tags(left_tag, right_tag):
-                    # if find closing tag
-                    in_tag = False
-                    new_blocks.append(
-                        self.stash.store('\n\n'.join(items)))
-                    items = []
+            if inList:
 
-        if items:
-            new_blocks.append(self.stash.store('\n\n'.join(items)))
-            new_blocks.append('\n')
-            
-        return "\n\n".join(new_blocks)
+                start, lines  = self._linesUntil(lines, (lambda line:
+                                 CORE_RE['ul'].match(line)
+                                 or CORE_RE['ol'].match(line)
+                                                  or not line.strip()))
 
-HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
+                self.parseChunk(parent_elem, start, inList-1, looseList=looseList)
+                inList = inList-1
 
+            else: # Ok, so it's just a simple block
 
-class HeaderPreprocessor(Preprocessor):
+                paragraph, lines = self._linesUntil(lines, lambda line:
+                                                     not line.strip() or line[0] == '>')
 
-    """Replace underlined headers with hashed headers.
+                if len(paragraph) and paragraph[0].startswith('#'):
+                    self._processHeader(parent_elem, paragraph)
+                    
+                elif len(paragraph) and \
+                CORE_RE["isline3"].match(paragraph[0]):
 
-    (To avoid the need for lookahead later.)
+                    self._processHR(parent_elem)
+                    lines = paragraph[1:] + lines
+                    
+                elif paragraph:
+                    self._processParagraph(parent_elem, paragraph,
+                                          inList, looseList)
 
-    """
+            if lines and not lines[0].strip():
+                lines = lines[1:]  # skip the first (blank) line
 
-    def run (self, lines):
-        i = -1
-        while i+1 < len(lines):
-            i = i+1
-            if not lines[i].strip():
-                continue
+    def _processHR(self, parentElem):
+        hr = etree.SubElement(parentElem, "hr")
+    
+    def _processHeader(self, parentElem, paragraph):
+        m = CORE_RE['header'].match(paragraph[0])
+        if m:
+            level = len(m.group(1))
+            h = etree.SubElement(parentElem, "h%d" % level)
+            h.text = m.group(2).strip()
+        else:
+            message(CRITICAL, "We've got a problem header!")
 
-            if lines[i].startswith("#"):
-                lines.insert(i+1, "\n")
 
-            if (i+1 <= len(lines)
-                  and lines[i+1]
-                  and lines[i+1][0] in ['-', '=']):
+    def _processParagraph(self, parentElem, paragraph, inList, looseList):
 
-                underline = lines[i+1].strip()
+        if ( parentElem.tag == 'li'
+                and not (looseList or parentElem.getchildren())):
 
-                if underline == "="*len(underline):
-                    lines[i] = "# " + lines[i].strip()
-                    lines[i+1] = ""
-                elif underline == "-"*len(underline):
-                    lines[i] = "## " + lines[i].strip()
-                    lines[i+1] = ""
-
-        return lines
+            # If this is the first paragraph inside "li", don't
+            # put <p> around it - append the paragraph bits directly
+            # onto parentElem
+            el = parentElem
+        else:
+            # Otherwise make a "p" element
+            el = etree.SubElement(parentElem, "p")
 
-HEADER_PREPROCESSOR = HeaderPreprocessor()
+        dump = []
+        
+        # Searching for hr or header
+        for line in paragraph:
+            # it's hr
+            if CORE_RE["isline3"].match(line):
+                el.text = "\n".join(dump)
+                self._processHR(el)
+                dump = []
+            # it's header
+            elif line.startswith("#"):
+                el.text = "\n".join(dump)   
+                self._processHeader(parentElem, [line])
+                dump = [] 
+            else:
+                dump.append(line)
+        if dump:
+            text = "\n".join(dump)    
+            el.text = text
 
+    def _processUList(self, parentElem, lines, inList):
+        self._processList(parentElem, lines, inList,
+                         listexpr='ul', tag = 'ul')
 
-class LinePreprocessor(Preprocessor):
-    """Convert HR lines to "___" format."""
-    blockquote_re = re.compile(r'^(> )+')
+    def _processOList(self, parentElem, lines, inList):
+        self._processList(parentElem, lines, inList,
+                         listexpr='ol', tag = 'ol')
 
-    def run (self, lines):
-        for i in range(len(lines)):
-            prefix = ''
-            m = self.blockquote_re.search(lines[i])
-            if m: 
-                prefix = m.group(0)
-            if self._isLine(lines[i][len(prefix):]):
-                lines[i] = prefix + "___"
-        return lines
 
-    def _isLine(self, block):
-        """Determine if a block should be replaced with an <HR>"""
-        if block.startswith("    "): 
-            return False  # a code block
-        text = "".join([x for x in block if not x.isspace()])
-        if len(text) <= 2:
-            return False
-        for pattern in ['isline1', 'isline2', 'isline3']:
-            m = CORE_RE[pattern].match(text)
-            if (m and m.group(1)):
-                return True
-        else:
-            return False
+    def _processList(self, parentElem, lines, inList, listexpr, tag):
+        """
+        Given a list of document lines starting with a list item,
+        finds the end of the list, breaks it up, and recursively
+        processes each list item and the remainder of the text file.
 
-LINE_PREPROCESSOR = LinePreprocessor()
+        Keyword arguments:
+        
+        * parentElem: A ElementTree element to which the content will be added
+        * lines: a list of lines
+        * inList: a level
+        
+        Returns: None
+        
+        """
+        ul = etree.SubElement(parentElem, tag) # ul might actually be '<ol>'
 
+        looseList = 0
 
-class ReferencePreprocessor(Preprocessor):
-    """Remove reference definitions from the text and store them for later use."""    
-    def run (self, lines):
-        new_text = [];
-        for line in lines:
-            m = CORE_RE['reference-def'].match(line)
-            if m:
-                id = m.group(2).strip().lower()
-                t = m.group(4).strip()  # potential title
-                if not t:
-                    self.references[id] = (m.group(3), t)
-                elif (len(t) >= 2
-                      and (t[0] == t[-1] == "\""
-                           or t[0] == t[-1] == "\'"
-                           or (t[0] == "(" and t[-1] == ")") ) ):
-                    self.references[id] = (m.group(3), t[1:-1])
-                else:
-                    new_text.append(line)
-            else:
-                new_text.append(line)
+        # Make a list of list items
+        items = []
+        item = -1
 
-        return new_text #+ "\n"
+        i = 0  # a counter to keep track of where we are
 
-REFERENCE_PREPROCESSOR = ReferencePreprocessor()
+        for line in lines: 
 
+            loose = 0
+            if not line.strip():
+                # If we see a blank line, this _might_ be the end of the list
+                i += 1
+                loose = 1
 
+                # Find the next non-blank line
+                for j in range(i, len(lines)):
+                    if lines[j].strip():
+                        next = lines[j]
+                        break
+                else:
+                    # There is no more text => end of the list
+                    break
 
+                # Check if the next non-blank line is still a part of the list
 
-"""
-INLINE PATTERNS
-=============================================================================
+                if ( CORE_RE[listexpr].match(next) or
+                     CORE_RE['tabbed'].match(next) ):
+                    # get rid of any white space in the line
+                    items[item].append(line.strip())
+                    looseList = loose or looseList
+                    continue
+                else:
+                    break # found end of the list
 
-Inline patterns such as *emphasis* are handled by means of auxiliary
-objects, one per pattern.  Pattern objects must be instances of classes
-that extend markdown.Pattern.  Each pattern object uses a single regular
-expression and needs support the following methods:
+            # Now we need to detect list items (at the current level)
+            # while also detabing child elements if necessary
 
-    pattern.getCompiledRegExp() # returns a regular expression
+            for expr in ['ul', 'ol', 'tabbed']:
 
-    pattern.handleMatch(m) # takes a match object and returns
-                           # an ElementTree element or just plain text
+                m = CORE_RE[expr].match(line)
+                if m:
+                    if expr in ['ul', 'ol']:  # We are looking at a new item
+                        #if m.group(1) :
+                        # Removed the check to allow for a blank line
+                        # at the beginning of the list item
+                        items.append([m.group(1)])
+                        item += 1
+                    elif expr == 'tabbed':  # This line needs to be detabbed
+                        items[item].append(m.group(4)) #after the 'tab'
 
-All of python markdown's built-in patterns subclass from Pattern,
-but you can add additional patterns that don't.
+                    i += 1
+                    break
+            else:
+                items[item].append(line)  # Just regular continuation
+                i += 1 # added on 2006.02.25
+        else:
+            i += 1
 
-Also note that all the regular expressions used by inline must
-capture the whole block.  For this reason, they all start with
-'^(.*)' and end with '(.*)!'.  In case with built-in expression
-Pattern takes care of adding the "^(.*)" and "(.*)!".
+        # Add the ElementTree elements
+        for item in items:
+            li = etree.SubElement(ul, "li")
 
-Finally, the order in which regular expressions are applied is very
-important - e.g. if we first replace http://.../ links with <a> tags
-and _then_ try to replace inline html, we would end up with a mess.
-So, we apply the expressions in the following order:
+            self.parseChunk(li, item, inList + 1, looseList = looseList)
 
-* escape and backticks have to go before everything else, so
-  that we can preempt any markdown patterns by escaping them.
+        # Process the remaining part of the section
 
-* then we handle auto-links (must be done before inline html)
+        self.parseChunk(parentElem, lines[i:], inList)
 
-* then we handle inline HTML.  At this point we will simply
-  replace all inline HTML strings with a placeholder and add
-  the actual HTML to a hash.
 
-* then inline images (must be done before links)
+    def _linesUntil(self, lines, condition):
+        """ 
+        A utility function to break a list of lines upon the
+        first line that satisfied a condition.  The condition
+        argument should be a predicate function.
+        
+        """
+        i = -1
+        for line in lines:
+            i += 1
+            if condition(line): 
+                break
+        else:
+            i += 1
+        return lines[:i], lines[i:]
 
-* then bracketed links, first regular then reference-style
+    def _processQuote(self, parentElem, lines, inList):
+        """
+        Given a list of document lines starting with a quote finds
+        the end of the quote, unindents it and recursively
+        processes the body of the quote and the remainder of the
+        text file.
 
-* finally we apply strong and emphasis
-"""
+        Keyword arguments:
+        
+        * parentElem: ElementTree element to which the content will be added
+        * lines: a list of lines
+        * inList: a level
+        
+        Returns: None 
+        
+        """
+        dequoted = []
+        i = 0
+        blank_line = False # allow one blank line between paragraphs
+        for line in lines:
+            m = CORE_RE['quoted'].match(line)
+            if m:
+                dequoted.append(m.group(1))
+                i += 1
+                blank_line = False
+            elif not blank_line and line.strip() != '':
+                dequoted.append(line)
+                i += 1
+            elif not blank_line and line.strip() == '':
+                dequoted.append(line)
+                i += 1
+                blank_line = True
+            else:
+                break
 
+        blockquote = etree.SubElement(parentElem, "blockquote")
 
-"""
-The actual regular expressions for patterns
------------------------------------------------------------------------------
-"""
+        self.parseChunk(blockquote, dequoted, inList)
+        self.parseChunk(parentElem, lines[i:], inList)
 
-NOBRACKET = r'[^\]\[]*'
-BRK = ( r'\[('
-        + (NOBRACKET + r'(\[')*6
-        + (NOBRACKET+ r'\])*')*6
-        + NOBRACKET + r')\]' )
-NOIMG = r'(?<!\!)'
 
-BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' # `e=f()` or ``e=f("`")``
-ESCAPE_RE = r'\\(.)'                             # \<
-EMPHASIS_RE = r'(\*)([^\*]*)\2'                    # *emphasis*
-STRONG_RE = r'(\*{2}|_{2})(.*?)\2'                      # **strong**
-STRONG_EM_RE = r'(\*{3}|_{3})(.*?)\2'            # ***strong***
 
-if SMART_EMPHASIS:
-    EMPHASIS_2_RE = r'(?<!\S)(_)(\S.*?)\2'        # _emphasis_
-else:
-    EMPHASIS_2_RE = r'(_)(.*?)\2'                 # _emphasis_
 
-LINK_RE = NOIMG + BRK + \
-r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*)\12)?\)''' # [text](url) or [text](<url>)
+    def _processCodeBlock(self, parentElem, lines, inList):
+        """
+        Given a list of document lines starting with a code block
+        finds the end of the block, puts it into the ElementTree verbatim
+        wrapped in ("<pre><code>") and recursively processes the
+        the remainder of the text file.
 
-IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)' # ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
-REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]'           # [Google][3]
-IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
-NOT_STRONG_RE = r'( \* )'                        # stand-alone * or _
-AUTOLINK_RE = r'<((?:f|ht)tps?://[^>]*)>'        # <http://www.123.com>
-AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'               # <me@example.com>
+        Keyword arguments:
+        
+        * parentElem: ElementTree element to which the content will be added
+        * lines: a list of lines
+        * inList: a level
+        
+        Returns: None
+        
+        """
+        detabbed, theRest = self.detectTabbed(lines)
 
-HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'               # <...>
-ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'               # &amp;
-LINE_BREAK_RE = r'  \n'                     # two spaces at end of line
-LINE_BREAK_2_RE = r'  $'                    # two spaces at end of text
+        pre = etree.SubElement(parentElem, "pre")
+        code = etree.SubElement(pre, "code")
+        
+        text = "\n".join(detabbed).rstrip()+"\n"
+        code.text = AtomicString(text)
+        self.parseChunk(parentElem, theRest, inList)        
 
+    def detectTabbed(self, lines):
+        """ Find indented text and remove indent before further proccesing.
 
-"""
-The pattern classes
------------------------------------------------------------------------------
-"""
+        Keyword arguments:
+        
+        * lines: an array of strings
+        * fn: a function that returns a substring of a string
+           if the string matches the necessary criteria
+        
+        Returns: a list of post processes items and the unused
+        remainder of the original list
+        
+        """
+        items = []
+        item = -1
+        i = 0 # to keep track of where we are
 
-class Pattern:
-    """Base class that inline patterns subclass. """
+        def detab(line):
+            match = CORE_RE['tabbed'].match(line)
+            if match:
+               return match.group(4)
 
-    def __init__ (self, pattern):
-        """
-        Create an instant of an inline pattern.
+        for line in lines:
+            if line.strip(): # Non-blank line
+                line = detab(line)
+                if line:
+                    items.append(line)
+                    i += 1
+                    continue
+                else:
+                    return items, lines[i:]
 
-        Keyword arguments:
+            else: # Blank line: _maybe_ we are done.
+                i += 1 # advance
 
-        * pattern: A regular expression that matches a pattern
+                # Find the next non-blank line
+                for j in range(i, len(lines)):  
+                    if lines[j].strip():
+                        next_line = lines[j]; break
+                else:
+                    break # There is no more text; we are done.
 
-        """
-        self.pattern = pattern
-        self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, re.DOTALL)
+                # Check if the next non-blank line is tabbed
+                if detab(next_line): # Yes, more work to do.
+                    items.append("")
+                    continue
+                else:
+                    break # No, we are done.
+        else:
+            i += 1
 
-        # Api for Markdown to pass safe_mode into instance
-        self.safe_mode = False
+        return items, lines[i:]
 
-    def getCompiledRegExp (self):
-        """ Return a compiled regular expression. """
-        return self.compiled_re
 
-    def handleMatch(self, m):
-        """Return a ElementTree element from the given match.
 
-        Subclasses should override this method.
 
-        Keyword arguments:
+"""
+PRE-PROCESSORS
+=============================================================================
 
-        * m: A re match object containing a match of the pattern.
+Preprocessors work on source text before we start doing anything too
+complicated.  There are two types of preprocessors: TextPreprocessor and
+Preprocessor.
+"""
+
+class TextPreprocessor:
+    """
+    TextPreprocessors are run before the text is broken into lines.
+    
+    Each TextPreprocessor implements a "run" method that takes a pointer to a
+    text string of the document, modifies it as necessary and returns
+    either the same pointer or a pointer to a new string.  
+    
+    TextPreprocessors must extend markdown.TextPreprocessor.
+
+    """
 
+    def run(self, text):
+        """ 
+        Each subclass of TextPreprocessor should override the `run` method, 
+        which takes the document text as a single string and returns the 
+        (possibly modified) document as a single string.
+        
         """
         pass
-    
-    def type(self):
-        """ Return class name, to define pattern type """
-        return self.__class__.__name__
 
-BasePattern = Pattern # for backward compatibility
 
-class SimpleTextPattern (Pattern):
-    """ Return a simple text of group(2) of a Pattern. """
-    def handleMatch(self, m):
-        text = m.group(2)
-        if text == INLINE_PLACEHOLDER_PREFIX:
-            return None
-        return text
+class Preprocessor:
+    """
+    Preprocessors are run after the text is broken into lines.
 
-class SimpleTagPattern (Pattern):
-    """ 
-    Return element of type `tag` with a text attribute of group(3) 
-    of a Pattern. 
+    Each preprocessor implements a "run" method that takes a pointer to a
+    list of lines of the document, modifies it as necessary and returns
+    either the same pointer or a pointer to a new list.  
+    
+    Preprocessors must extend markdown.Preprocessor.
     
     """
-    def __init__ (self, pattern, tag):
-        Pattern.__init__(self, pattern)
-        self.tag = tag
 
-    def handleMatch(self, m):
-        el = etree.Element(self.tag)
-        el.text = m.group(3)
-        return el
-
-class SubstituteTagPattern (SimpleTagPattern):
-    """ Return a eLement of type `tag` with no children. """
-    def handleMatch (self, m):
-        return etree.Element(self.tag)
+    def run(self, lines):
+        """
+        Each subclass of Preprocessor should override the `run` method, which
+        takes the document as a list of strings split by newlines and returns
+        the (possibly modified) list of lines.
 
-class BacktickPattern (Pattern):
-    """ Return a `<code>` element containing the matching text. """
-    def __init__ (self, pattern):
-        Pattern.__init__(self, pattern)
-        self.tag = "code"
+        """
+        pass
+ 
 
-    def handleMatch(self, m):
-        el = etree.Element(self.tag)
-        el.text = AtomicString(m.group(3).strip())
-        return el
+class HtmlBlockPreprocessor(TextPreprocessor):
+    """Remove html blocks from the text and store them for later retrieval."""
 
+    right_tag_patterns = ["</%s>", "%s>"]
+    
+    def _get_left_tag(self, block):
+        return block[1:].replace(">", " ", 1).split()[0].lower()
 
-class DoubleTagPattern (SimpleTagPattern): 
-    """Return a ElementTree element nested in tag2 nested in tag1.
+    def _get_right_tag(self, left_tag, block):        
+        for p in self.right_tag_patterns:
+            tag = p % left_tag
+            i = block.rfind(tag)
+            if i > 2:
+                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
+        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
 
-    Useful for strong emphasis etc.
+    def _equal_tags(self, left_tag, right_tag):
+        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
+            return True
+        if ("/" + left_tag) == right_tag:
+            return True
+        if (right_tag == "--" and left_tag == "--"):
+            return True
+        elif left_tag == right_tag[1:] \
+            and right_tag[0] != "<":
+            return True
+        else:
+            return False
 
-    """
-    def handleMatch(self, m):
-        tag1, tag2 = self.tag.split(",")
-        el1 = etree.Element(tag1)
-        el2 = etree.SubElement(el1, tag2)
-        el2.text = m.group(3)
-        return el1
+    def _is_oneliner(self, tag):
+        return (tag in ['hr', 'hr/'])
 
+    def run(self, text):
+        new_blocks = []
+        text = text.split("\n\n")        
+        items = []
+        left_tag = ''
+        right_tag = ''
+        in_tag = False # flag
 
-class HtmlPattern (Pattern):
-    """ Store raw inline html and return a placeholder. """
-    def handleMatch (self, m):
-        rawhtml = m.group(2)
-        inline = True
-        place_holder = self.stash.store(rawhtml)
-        return place_holder
+        while text:
+            block = text[0]
+            if block.startswith("\n"):
+                block = block[1:]
+            text = text[1:]
+            
+            if block.startswith("\n"):
+                block = block[1:]
 
+            if not in_tag:
+                if block.startswith("<"):
+                    left_tag = self._get_left_tag(block)
+                    right_tag, data_index = self._get_right_tag(left_tag, block)
+                    
+                    if data_index < len(block):
+                        text.insert(0, block[data_index:])
+                        block = block[:data_index]
 
+                    if not (isBlockLevel(left_tag) \
+                        or block[1] in ["!", "?", "@", "%"]):
+                        new_blocks.append(block)
+                        continue
 
-class LinkPattern (Pattern):
-    """ Return a link element from the given match. """
-    def handleMatch(self, m):
-        el = etree.Element("a")
-        el.text = m.group(2)
-        title = m.group(11)
-        href = m.group(9)
+                    if self._is_oneliner(left_tag):
+                        new_blocks.append(block.strip())
+                        continue
+                        
+                    if block[1] == "!":
+                        # is a comment block
+                        left_tag = "--"
+                        right_tag, data_index = self._get_right_tag(left_tag, block)
+                        # keep checking conditions below and maybe just append
+                        
+                    if block.rstrip().endswith(">") \
+                        and self._equal_tags(left_tag, right_tag):
+                        new_blocks.append(
+                            self.stash.store(block.strip()))
+                        continue
+                    else: #if not block[1] == "!":
+                        # if is block level tag and is not complete
+                        
+                        if isBlockLevel(left_tag) or left_tag == "--" \
+                        and not block.rstrip().endswith(">"):
+                            items.append(block.strip())
+                            in_tag = True
+                        else:
+                            new_blocks.append(
+                            self.stash.store(block.strip()))
+                            
+                        continue
 
-        if href:
-            if href[0] == "<":
-                href = href[1:-1]
-            el.set("href", self.sanitize_url(href.strip()))
-        else:
-            el.set("href", "")
-            
-        if title:
-            title = dequote(title) #.replace('"', "&quot;")
-            el.set("title", title)
-        return el
+                new_blocks.append(block)
 
-    def sanitize_url(self, url):
-        """ 
-        Sanitize a url against xss attacks in "safe_mode".
+            else:
+                items.append(block.strip())
+                
+                right_tag, data_index = self._get_right_tag(left_tag, block)
+                
+                if self._equal_tags(left_tag, right_tag):
+                    # if find closing tag
+                    in_tag = False
+                    new_blocks.append(
+                        self.stash.store('\n\n'.join(items)))
+                    items = []
 
-        Rather than specifically blacklisting `javascript:alert("XSS")` and all
-        its aliases (see <http://ha.ckers.org/xss.html>), we whitelist known
-        safe url formats. Most urls contain a network location, however some 
-        are known not to (i.e.: mailto links). Script urls do not contain a 
-        location. Additionally, for `javascript:...`, the scheme would be 
-        "javascript" but some aliases will appear to `urlparse()` to have no 
-        scheme. On top of that relative links (i.e.: "foo/bar.html") have no 
-        scheme. Therefore we must check "path", "parameters", "query" and 
-        "fragment" for any literal colons. We don't check "scheme" for colons 
-        because it *should* never have any and "netloc" must allow the form:
-        `username:password@host:port`.
-        
-        """
-        locless_schemes = ['', 'mailto', 'news']
-        scheme, netloc, path, params, query, fragment = url = urlparse(url)
-        safe_url = False
-        if netloc != '' or scheme in locless_schemes:
-            safe_url = True
+        if items:
+            new_blocks.append(self.stash.store('\n\n'.join(items)))
+            new_blocks.append('\n')
+            
+        return "\n\n".join(new_blocks)
 
-        for part in url[2:]:
-            if ":" in part:
-                safe_url = False
+HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
 
-        if self.safe_mode and not safe_url:
-            return ''
-        else:
-            return urlunparse(url)
 
-class ImagePattern(LinkPattern):
-    """ Return a img element from the given match. """
-    def handleMatch(self, m):
-        el = etree.Element("img")
-        src_parts = m.group(9).split()
-        if src_parts:
-            src = src_parts[0]
-            if src[0] == "<" and src[-1] == ">":
-                src = src[1:-1]
-            el.set('src', self.sanitize_url(src))
-        else:
-            el.set('src', "")
-        if len(src_parts) > 1:
-            el.set('title', dequote(" ".join(src_parts[1:])))
-  
-        if ENABLE_ATTRIBUTES:
-            truealt = handleAttributes(m.group(2), el)
-        else:
-            truealt = m.group(2)
-            
-        el.set('alt', truealt)
-        return el
+class HeaderPreprocessor(Preprocessor):
 
-class ReferencePattern(LinkPattern):
-    """ Match to a stored reference and return link element. """
-    def handleMatch(self, m):
+    """Replace underlined headers with hashed headers.
 
-        if m.group(9):
-            id = m.group(9).lower()
-        else:
-            # if we got something like "[Google][]"
-            # we'll use "google" as the id
-            id = m.group(2).lower()
+    (To avoid the need for lookahead later.)
 
-        if not self.references.has_key(id): # ignore undefined refs
-            return None
-        href, title = self.references[id]
+    """
 
-        text = m.group(2)
-        return self.makeTag(href, title, text)
+    def run (self, lines):
+        i = -1
+        while i+1 < len(lines):
+            i = i+1
+            if not lines[i].strip():
+                continue
 
-    def makeTag(self, href, title, text):
-        el = etree.Element('a')
-        
-        el.set('href', self.sanitize_url(href))
-        if title:
-            el.set('title', title)
+            if lines[i].startswith("#"):
+                lines.insert(i+1, "\n")
 
-        el.text = text
-        return el
+            if (i+1 <= len(lines)
+                  and lines[i+1]
+                  and lines[i+1][0] in ['-', '=']):
 
+                underline = lines[i+1].strip()
 
-class ImageReferencePattern (ReferencePattern):
-    """ Match to a stored reference and return img element. """
-    def makeTag(self, href, title, text):
-        el = etree.Element("img")
-        el.set("src", self.sanitize_url(href))
-        if title:
-            el.set("title", title)
-        el.set("alt", text)
-        return el
+                if underline == "="*len(underline):
+                    lines[i] = "# " + lines[i].strip()
+                    lines[i+1] = ""
+                elif underline == "-"*len(underline):
+                    lines[i] = "## " + lines[i].strip()
+                    lines[i+1] = ""
 
+        return lines
 
-class AutolinkPattern (Pattern):
-    """ Return a link Element given an autolink (`<http://example/com>`). """
-    def handleMatch(self, m):
-        el = etree.Element("a")
-        el.set('href', m.group(2))
-        el.text = AtomicString(m.group(2))
-        return el
+HEADER_PREPROCESSOR = HeaderPreprocessor()
 
-class AutomailPattern (Pattern):
-    """ 
-    Return a mailto link Element given an automail link (`<foo@example.com>`). 
-    """
-    def handleMatch(self, m):
-        el = etree.Element('a')
-        email = m.group(2)
-        if email.startswith("mailto:"):
-            email = email[len("mailto:"):]
 
-        def codepoint2name(code):
-            """Return entity definition by code, or the code if not defined."""
-            entity = htmlentitydefs.codepoint2name.get(code)
-            if entity:
-                return "%s%s;" % (AMP_SUBSTITUTE, entity)
-            else:
-                return "%s#%d;" % (AMP_SUBSTITUTE, code)
+class LinePreprocessor(Preprocessor):
+    """Convert HR lines to "___" format."""
+    blockquote_re = re.compile(r'^(> )+')
 
-        letters = [codepoint2name(ord(letter)) for letter in email]
-        el.text = AtomicString(''.join(letters))
+    def run (self, lines):
+        for i in range(len(lines)):
+            prefix = ''
+            m = self.blockquote_re.search(lines[i])
+            if m: 
+                prefix = m.group(0)
+            if self._isLine(lines[i][len(prefix):]):
+                lines[i] = prefix + "___"
+        return lines
 
-        mailto = "mailto:" + email
-        mailto = "".join([AMP_SUBSTITUTE + '#%d;' % 
-                          ord(letter) for letter in mailto])
-        el.set('href', mailto)
-        return el
+    def _isLine(self, block):
+        """Determine if a block should be replaced with an <HR>"""
+        if block.startswith("    "): 
+            return False  # a code block
+        text = "".join([x for x in block if not x.isspace()])
+        if len(text) <= 2:
+            return False
+        for pattern in ['isline1', 'isline2', 'isline3']:
+            m = CORE_RE[pattern].match(text)
+            if (m and m.group(1)):
+                return True
+        else:
+            return False
 
-ESCAPE_PATTERN          = SimpleTextPattern(ESCAPE_RE)
-NOT_STRONG_PATTERN      = SimpleTextPattern(NOT_STRONG_RE)
+LINE_PREPROCESSOR = LinePreprocessor()
 
-BACKTICK_PATTERN        = BacktickPattern(BACKTICK_RE)
-STRONG_PATTERN          = SimpleTagPattern(STRONG_RE, 'strong')
-EMPHASIS_PATTERN        = SimpleTagPattern(EMPHASIS_RE, 'em')
-EMPHASIS_PATTERN_2      = SimpleTagPattern(EMPHASIS_2_RE, 'em')
 
-STRONG_EM_PATTERN       = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
+class ReferencePreprocessor(Preprocessor):
+    """Remove reference definitions from the text and store them for later use."""    
+    def run (self, lines):
+        new_text = [];
+        for line in lines:
+            m = CORE_RE['reference-def'].match(line)
+            if m:
+                id = m.group(2).strip().lower()
+                t = m.group(4).strip()  # potential title
+                if not t:
+                    self.references[id] = (m.group(3), t)
+                elif (len(t) >= 2
+                      and (t[0] == t[-1] == "\""
+                           or t[0] == t[-1] == "\'"
+                           or (t[0] == "(" and t[-1] == ")") ) ):
+                    self.references[id] = (m.group(3), t[1:-1])
+                else:
+                    new_text.append(line)
+            else:
+                new_text.append(line)
 
-LINE_BREAK_PATTERN      = SubstituteTagPattern(LINE_BREAK_RE, 'br')
-LINE_BREAK_PATTERN_2    = SubstituteTagPattern(LINE_BREAK_2_RE, 'br')
+        return new_text #+ "\n"
 
-LINK_PATTERN            = LinkPattern(LINK_RE)
-IMAGE_LINK_PATTERN      = ImagePattern(IMAGE_LINK_RE)
-IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
-REFERENCE_PATTERN       = ReferencePattern(REFERENCE_RE)
+REFERENCE_PREPROCESSOR = ReferencePreprocessor()
 
-HTML_PATTERN            = HtmlPattern(HTML_RE)
-ENTITY_PATTERN          = HtmlPattern(ENTITY_RE)
 
-AUTOLINK_PATTERN        = AutolinkPattern(AUTOLINK_RE)
-AUTOMAIL_PATTERN        = AutomailPattern(AUTOMAIL_RE)
 
 
 """
-POST-PROCESSORS
+INLINE PATTERNS
 =============================================================================
 
-Markdown also allows post-processors, which are similar to preprocessors in
-that they need to implement a "run" method. However, they are run after core
-processing.
-
-There are two types of post-processors: Postprocessor and TextPostprocessor
-"""
-
-class Postprocessor:
-    """
-    Postprocessors are run before the ElementTree serialization.
-    
-    Each Postprocessor implements a "run" method that takes a pointer to a
-    ElementTree, modifies it as necessary and returns a ElementTree 
-    document.
-    
-    Postprocessors must extend markdown.Postprocessor.
+Inline patterns such as *emphasis* are handled by means of auxiliary
+objects, one per pattern.  Pattern objects must be instances of classes
+that extend markdown.Pattern.  Each pattern object uses a single regular
+expression and needs support the following methods:
 
-    """
-    def run(self, root):
-        """
-        Subclasses of Postprocessor should implement a `run` method, which
-        takes a root Element. Method can return another Element, and global
-        root Element will be replaced, or just modify current and return None.
-        """
-        pass
+    pattern.getCompiledRegExp() # returns a regular expression
 
+    pattern.handleMatch(m) # takes a match object and returns
+                           # an ElementTree element or just plain text
 
-class TextPostprocessor:
-    """
-    TextPostprocessors are run after the ElementTree it converted back into text.
-    
-    Each TextPostprocessor implements a "run" method that takes a pointer to a
-    text string, modifies it as necessary and returns a text string.
-    
-    TextPostprocessors must extend markdown.TextPostprocessor.
-    
-    """
+All of python markdown's built-in patterns subclass from Pattern,
+but you can add additional patterns that don't.
 
-    def run(self, text):
-        """
-        Subclasses of TextPostprocessor should implement a `run` method, which
-        takes the html document as a single text string and returns a 
-        (possibly modified) string.
+Also note that all the regular expressions used by inline must
+capture the whole block.  For this reason, they all start with
+'^(.*)' and end with '(.*)!'.  In case with built-in expression
+Pattern takes care of adding the "^(.*)" and "(.*)!".
 
-        """
-        pass
+Finally, the order in which regular expressions are applied is very
+important - e.g. if we first replace http://.../ links with <a> tags
+and _then_ try to replace inline html, we would end up with a mess.
+So, we apply the expressions in the following order:
 
+* escape and backticks have to go before everything else, so
+  that we can preempt any markdown patterns by escaping them.
 
-class PrettifyPostprocessor(Postprocessor):
-    """Add linebreaks to the html document."""
-    def _prettifyETree(self, elem):
-        """Recursively add linebreaks to ElementTree children."""
-        i = "\n"
-        if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
-            if (not elem.text or not elem.text.strip()) \
-                    and len(elem) and isBlockLevel(elem[0].tag):
-                elem.text = i
-            for e in elem:
-                if isBlockLevel(e.tag):
-                    self._prettifyETree(e)
-            if not elem.tail or not elem.tail.strip():
-                elem.tail = i
-        if not elem.tail or not elem.tail.strip():
-            elem.tail = i
+* then we handle auto-links (must be done before inline html)
 
-    def run(self, root):
-        """.Add linebreaks to ElementTree root object."""
-        self._prettifyETree(root)
-        # Do <br />'s seperately as they are often in the middle of
-        # inline content and missed by _prettifyETree.
-        brs = root.getiterator('br')
-        for br in brs:
-            if not br.tail or not br.tail.strip():
-                br.tail = '\n'
-            else:
-                br.tail = '\n%s' % br.tail
+* then we handle inline HTML.  At this point we will simply
+  replace all inline HTML strings with a placeholder and add
+  the actual HTML to a hash.
 
-PRETTIFYPOSTPROCESSOR = PrettifyPostprocessor()
+* then inline images (must be done before links)
 
+* then bracketed links, first regular then reference-style
 
-class RawHtmlTextPostprocessor(TextPostprocessor):
-    """ Restore raw html to the document. """
-    def __init__(self):
-        pass
+* finally we apply strong and emphasis
+"""
 
-    def run(self, text):
-        """ Iterate over html stash and restore "safe" html. """
-        for i in range(self.stash.html_counter):
-            html, safe  = self.stash.rawHtmlBlocks[i]
-            if self.safeMode and not safe:
-                if str(self.safeMode).lower() == 'escape':
-                    html = self.escape(html)
-                elif str(self.safeMode).lower() == 'remove':
-                    html = ''
-                else:
-                    html = HTML_REMOVED_TEXT
-            if safe or not self.safeMode:
-                text = text.replace("<p>%s</p>" % (HTML_PLACEHOLDER % i),
-                                    html + "\n")
-            text =  text.replace(HTML_PLACEHOLDER % i, html)
-        return text
 
-    def escape(self, html):
-        """ Basic html escaping """
-        html = html.replace('&', '&amp;')
-        html = html.replace('<', '&lt;')
-        html = html.replace('>', '&gt;')
-        return html.replace('"', '&quot;')
+"""
+The actual regular expressions for patterns
+-----------------------------------------------------------------------------
+"""
 
-RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
+NOBRACKET = r'[^\]\[]*'
+BRK = ( r'\[('
+        + (NOBRACKET + r'(\[')*6
+        + (NOBRACKET+ r'\])*')*6
+        + NOBRACKET + r')\]' )
+NOIMG = r'(?<!\!)'
 
+BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' # `e=f()` or ``e=f("`")``
+ESCAPE_RE = r'\\(.)'                             # \<
+EMPHASIS_RE = r'(\*)([^\*]*)\2'                    # *emphasis*
+STRONG_RE = r'(\*{2}|_{2})(.*?)\2'                      # **strong**
+STRONG_EM_RE = r'(\*{3}|_{3})(.*?)\2'            # ***strong***
 
-class AndSubstitutePostprocessor(TextPostprocessor):
-    """ Restore valid entities """
-    def __init__(self):
-        pass
+if SMART_EMPHASIS:
+    EMPHASIS_2_RE = r'(?<!\S)(_)(\S.*?)\2'        # _emphasis_
+else:
+    EMPHASIS_2_RE = r'(_)(.*?)\2'                 # _emphasis_
 
-    def run(self, text):
+LINK_RE = NOIMG + BRK + \
+r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*)\12)?\)''' # [text](url) or [text](<url>)
 
-        text =  text.replace(AMP_SUBSTITUTE, "&")
-        return text
+IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)' # ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
+REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]'           # [Google][3]
+IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
+NOT_STRONG_RE = r'( \* )'                        # stand-alone * or _
+AUTOLINK_RE = r'<((?:f|ht)tps?://[^>]*)>'        # <http://www.123.com>
+AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'               # <me@example.com>
 
-AMPSUBSTITUTETEXTPOSTPROCESSOR = AndSubstitutePostprocessor()
+HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'               # <...>
+ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'               # &amp;
+LINE_BREAK_RE = r'  \n'                     # two spaces at end of line
+LINE_BREAK_2_RE = r'  $'                    # two spaces at end of text
 
 
 """
-MISC AUXILIARY CLASSES
-=============================================================================
+The pattern classes
+-----------------------------------------------------------------------------
 """
 
-class AtomicString(unicode):
-    """A string which should not be further processed."""
-    pass
+class Pattern:
+    """Base class that inline patterns subclass. """
 
+    def __init__ (self, pattern):
+        """
+        Create an instant of an inline pattern.
 
-class HtmlStash:
-    """
-    This class is used for stashing HTML objects that we extract
-    in the beginning and replace with place-holders.
-    """
+        Keyword arguments:
 
-    def __init__ (self):
-        """ Create a HtmlStash. """
-        self.html_counter = 0 # for counting inline html segments
-        self.rawHtmlBlocks=[]
+        * pattern: A regular expression that matches a pattern
 
-    def store(self, html, safe=False):
         """
-        Saves an HTML segment for later reinsertion.  Returns a
-        placeholder string that needs to be inserted into the
-        document.
+        self.pattern = pattern
+        self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, re.DOTALL)
+
+        # Api for Markdown to pass safe_mode into instance
+        self.safe_mode = False
+
+    def getCompiledRegExp (self):
+        """ Return a compiled regular expression. """
+        return self.compiled_re
+
+    def handleMatch(self, m):
+        """Return a ElementTree element from the given match.
+
+        Subclasses should override this method.
 
         Keyword arguments:
-        
-        * html: an html segment
-        * safe: label an html segment as safe for safemode
-        
-        Returns : a placeholder string 
-        
+
+        * m: A re match object containing a match of the pattern.
+
         """
-        self.rawHtmlBlocks.append((html, safe))
-        placeholder = HTML_PLACEHOLDER % self.html_counter
-        self.html_counter += 1
-        return placeholder
+        pass
     
-    def rest(self):
-        self.html_counter = 0
-        self.rawHtmlBlocks = []
+    def type(self):
+        """ Return class name, to define pattern type """
+        return self.__class__.__name__
 
+BasePattern = Pattern # for backward compatibility
+
+class SimpleTextPattern (Pattern):
+    """ Return a simple text of group(2) of a Pattern. """
+    def handleMatch(self, m):
+        text = m.group(2)
+        if text == INLINE_PLACEHOLDER_PREFIX:
+            return None
+        return text
+
+class SimpleTagPattern (Pattern):
+    """ 
+    Return element of type `tag` with a text attribute of group(3) 
+    of a Pattern. 
     
-class InlineStash:
-    
-    def __init__(self):
-        """ Create a InlineStash. """
-        self.prefix = INLINE_PLACEHOLDER_PREFIX
-        self.suffix = ETX
-        self._nodes = {}
-        self.phLength = 4 + len(self.prefix) + len(self.suffix)
-        self._placeholder_re = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
-        
-    def _genPlaceholder(self, type):
-        """ Generate a placeholder """
-        id = "%04d" % len(self._nodes)
-        hash = INLINE_PLACEHOLDER % id 
-        return hash, id
-    
-    def extractId(self, data, index):
-        """ 
-        Extract id from data string, start from index
-        
-        Keyword arguments:
-        
-        * data: string
-        * index: index, from which we start search 
-        
-        Returns: placeholder id and  string index, after 
-        found placeholder
-        """
-        m = self._placeholder_re.search(data, index)
-        if m:
-            return m.group(1), m.end()
-        else:
-            return None, index + 1 
-    
-    def isin(self, id):
-        """ Check if node with given id exists in stash """
-        return self._nodes.has_key(id)
-    
-    def get(self, id):
-        """ Return node by id """
-        return self._nodes.get(id)
-    
-    def add(self, node, type):
-        """ Add node to stash """
-        pholder, id = self._genPlaceholder(type)
-        self._nodes[id] = node
-        return pholder
-    
-    def rest(self):
-        """ Reset instance """
-        self._nodes = {}
-    
-"""
-CORE MARKDOWN
-=============================================================================
-
-The core part is still quite messy, despite substantial refactoring.  If you
-are thinking of extending the syntax, see first if you can do it through
-pre-processors, post-processors, inline patterns or a combination of the three.
-"""
-
-def _wrapRe(raw_re) : return re.compile("^%s$" % raw_re, re.DOTALL)
-CORE_RE = {
-    'header':          _wrapRe(r'(#{1,6})[ \t]*(.*?)[ \t]*(#*)'), # # A title
-    'reference-def':   _wrapRe(r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)'),
-                               # [Google]: http://www.google.com/
-    'containsline':    _wrapRe(r'([-]*)$|^([=]*)'), # -----, =====, etc.
-    'ol':              _wrapRe(r'[ ]{0,3}[\d]*\.\s+(.*)'), # 1. text
-    'ul':              _wrapRe(r'[ ]{0,3}[*+-]\s+(.*)'), # "* text"
-    'isline1':         _wrapRe(r'(\**)'), # ***
-    'isline2':         _wrapRe(r'(\-*)'), # ---
-    'isline3':         _wrapRe(r'(\_*)'), # ___
-    'tabbed':          _wrapRe(r'((\t)|(    ))(.*)'), # an indented line
-    'quoted':          _wrapRe(r'[ ]{0,2}> ?(.*)'), # a quoted block ("> ...")
-    'containsline':    re.compile(r'^([-]*)$|^([=]*)$', re.M),
-    'attr':            re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123}
-}
+    """
+    def __init__ (self, pattern, tag):
+        Pattern.__init__(self, pattern)
+        self.tag = tag
 
+    def handleMatch(self, m):
+        el = etree.Element(self.tag)
+        el.text = m.group(3)
+        return el
 
-class Markdown:
-    """Converts markdown to HTML."""
+class SubstituteTagPattern (SimpleTagPattern):
+    """ Return a eLement of type `tag` with no children. """
+    def handleMatch (self, m):
+        return etree.Element(self.tag)
 
-    def __init__(self, 
-                 extensions=[],
-                 extension_configs={},
-                 safe_mode = False):
-        """
-        Creates a new Markdown instance.
+class BacktickPattern (Pattern):
+    """ Return a `<code>` element containing the matching text. """
+    def __init__ (self, pattern):
+        Pattern.__init__(self, pattern)
+        self.tag = "code"
 
-        Keyword arguments:
-        
-        * extensions: A list of extensions.  
-           If they are of type string, the module mdx_name.py will be loaded.  
-           If they are a subclass of markdown.Extension, they will be used 
-           as-is.
-        * extension-configs: Configuration setting for extensions.
-        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
-        
-        """
-        self.source = None
-        self.safeMode = safe_mode
-        self.registeredExtensions = []
-        self.docType = ""
-        self.stripTopLevelTags = True
+    def handleMatch(self, m):
+        el = etree.Element(self.tag)
+        el.text = AtomicString(m.group(3).strip())
+        return el
 
-        self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
 
-        self.preprocessors = [HEADER_PREPROCESSOR,
-                              LINE_PREPROCESSOR,
-                              # A footnote preprocessor will
-                              # get inserted here
-                              REFERENCE_PREPROCESSOR]
+class DoubleTagPattern (SimpleTagPattern): 
+    """Return a ElementTree element nested in tag2 nested in tag1.
 
+    Useful for strong emphasis etc.
 
-        self.postprocessors = [PRETTIFYPOSTPROCESSOR,
-                               # a footnote postprocessor will get
-                               # inserted later
-                               ]
+    """
+    def handleMatch(self, m):
+        tag1, tag2 = self.tag.split(",")
+        el1 = etree.Element(tag1)
+        el2 = etree.SubElement(el1, tag2)
+        el2.text = m.group(3)
+        return el1
 
-        self.textPostprocessors = [# a footnote postprocessor will get
-                                   # inserted here
-                                   RAWHTMLTEXTPOSTPROCESSOR,
-                                   AMPSUBSTITUTETEXTPOSTPROCESSOR]
 
-        self.prePatterns = []
-                               
-        self.inlinePatterns = [
-                               BACKTICK_PATTERN,
-                               ESCAPE_PATTERN,
-                               REFERENCE_PATTERN,
-                               LINK_PATTERN,
-                               IMAGE_LINK_PATTERN,
-                               IMAGE_REFERENCE_PATTERN,
-                               AUTOLINK_PATTERN,
-                               AUTOMAIL_PATTERN,
-                               LINE_BREAK_PATTERN_2,
-                               LINE_BREAK_PATTERN,
-                               HTML_PATTERN,
-                               ENTITY_PATTERN,
-                               NOT_STRONG_PATTERN,
-                               STRONG_EM_PATTERN,
-                               STRONG_PATTERN,
-                               EMPHASIS_PATTERN,
-                               EMPHASIS_PATTERN_2
-                               # The order of the handlers matters!!!
-                               ]
-        
-        self.inlineStash = InlineStash()
-        self.references = {}
-        self.htmlStash = HtmlStash()
+class HtmlPattern (Pattern):
+    """ Store raw inline html and return a placeholder. """
+    def handleMatch (self, m):
+        rawhtml = m.group(2)
+        inline = True
+        place_holder = self.stash.store(rawhtml)
+        return place_holder
 
 
-        self.registerExtensions(extensions = extensions,
-                                configs = extension_configs)
 
-        self.reset()
+class LinkPattern (Pattern):
+    """ Return a link element from the given match. """
+    def handleMatch(self, m):
+        el = etree.Element("a")
+        el.text = m.group(2)
+        title = m.group(11)
+        href = m.group(9)
 
+        if href:
+            if href[0] == "<":
+                href = href[1:-1]
+            el.set("href", self.sanitize_url(href.strip()))
+        else:
+            el.set("href", "")
+            
+        if title:
+            title = dequote(title) #.replace('"', "&quot;")
+            el.set("title", title)
+        return el
 
-    def registerExtensions(self, extensions, configs):
+    def sanitize_url(self, url):
         """ 
-        Register extensions with this instance of Markdown.
+        Sanitize a url against xss attacks in "safe_mode".
 
-        Keyword aurguments:
-        
-        * extensions: A list of extensions, which can either
-           be strings or objects.  See the docstring on Markdown.
-        * configs: A dictionary mapping module names to config options. 
+        Rather than specifically blacklisting `javascript:alert("XSS")` and all
+        its aliases (see <http://ha.ckers.org/xss.html>), we whitelist known
+        safe url formats. Most urls contain a network location, however some 
+        are known not to (i.e.: mailto links). Script urls do not contain a 
+        location. Additionally, for `javascript:...`, the scheme would be 
+        "javascript" but some aliases will appear to `urlparse()` to have no 
+        scheme. On top of that relative links (i.e.: "foo/bar.html") have no 
+        scheme. Therefore we must check "path", "parameters", "query" and 
+        "fragment" for any literal colons. We don't check "scheme" for colons 
+        because it *should* never have any and "netloc" must allow the form:
+        `username:password@host:port`.
         
         """
-        for ext in extensions:
-            if isinstance(ext, basestring):
-                ext = load_extension(ext, configs.get(ext, []))
-            elif hasattr(ext, 'extendMarkdown'):
-                # Looks like an Extension.
-                # Nothing to do here.
-                pass
-            else:
-                message(ERROR, "Incorrect type! Extension '%s' is "
-                               "neither a string or an Extension." %(repr(ext)))
-                continue
-            ext.extendMarkdown(self, globals())
+        locless_schemes = ['', 'mailto', 'news']
+        scheme, netloc, path, params, query, fragment = url = urlparse(url)
+        safe_url = False
+        if netloc != '' or scheme in locless_schemes:
+            safe_url = True
 
-    def registerExtension(self, extension):
-        """ This gets called by the extension """
-        self.registeredExtensions.append(extension)
+        for part in url[2:]:
+            if ":" in part:
+                safe_url = False
 
-    def reset(self):
-        """
-        Resets all state variables so that we can start with a new text.
-        """
-        self.inlineStash.rest()
-        self.htmlStash.rest()
-        self.references.clear()
+        if self.safe_mode and not safe_url:
+            return ''
+        else:
+            return urlunparse(url)
 
-        HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
-        LINE_PREPROCESSOR.stash = self.htmlStash
-        REFERENCE_PREPROCESSOR.references = self.references
-        HTML_PATTERN.stash = self.htmlStash
-        ENTITY_PATTERN.stash = self.htmlStash
-        REFERENCE_PATTERN.references = self.references
-        IMAGE_REFERENCE_PATTERN.references = self.references
-        RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
-        RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
+class ImagePattern(LinkPattern):
+    """ Return a img element from the given match. """
+    def handleMatch(self, m):
+        el = etree.Element("img")
+        src_parts = m.group(9).split()
+        if src_parts:
+            src = src_parts[0]
+            if src[0] == "<" and src[-1] == ">":
+                src = src[1:-1]
+            el.set('src', self.sanitize_url(src))
+        else:
+            el.set('src', "")
+        if len(src_parts) > 1:
+            el.set('title', dequote(" ".join(src_parts[1:])))
+  
+        if ENABLE_ATTRIBUTES:
+            truealt = handleAttributes(m.group(2), el)
+        else:
+            truealt = m.group(2)
+            
+        el.set('alt', truealt)
+        return el
 
-        for extension in self.registeredExtensions:
-            extension.reset()
+class ReferencePattern(LinkPattern):
+    """ Match to a stored reference and return link element. """
+    def handleMatch(self, m):
 
-        for pattern in self.inlinePatterns:
-            pattern.safe_mode = self.safeMode
+        if m.group(9):
+            id = m.group(9).lower()
+        else:
+            # if we got something like "[Google][]"
+            # we'll use "google" as the id
+            id = m.group(2).lower()
 
-    def _processSection(self, parent_elem, lines,
-                        inList=0, looseList=0):
-        """
-        Process a section of a source document, looking for high
-        level structural elements like lists, block quotes, code
-        segments, html blocks, etc.  Some those then get stripped
-        of their high level markup (e.g. get unindented) and the
-        lower-level markup is processed recursively.
+        if not self.references.has_key(id): # ignore undefined refs
+            return None
+        href, title = self.references[id]
 
-        Keyword arguments:
-        
-        * parent_elem: A ElementTree element to which the content will be added.
-        * lines: a list of lines
-        * inList: a level
-        
-        Returns: None
+        text = m.group(2)
+        return self.makeTag(href, title, text)
+
+    def makeTag(self, href, title, text):
+        el = etree.Element('a')
         
-        """
-        # Loop through lines until none left.
-        while lines:
-            
-            # Skipping empty line
-            if not lines[0]:
-                lines = lines[1:]
-                continue
-            
-            # Check if this section starts with a list, a blockquote or
-            # a code block
+        el.set('href', self.sanitize_url(href))
+        if title:
+            el.set('title', title)
 
-            processFn = { 'ul':     self._processUList,
-                          'ol':     self._processOList,
-                          'quoted': self._processQuote,
-                          'tabbed': self._processCodeBlock}
+        el.text = text
+        return el
 
-            for regexp in ['ul', 'ol', 'quoted', 'tabbed']:
-                m = CORE_RE[regexp].match(lines[0])
-                if m:
-                    processFn[regexp](parent_elem, lines, inList)
-                    return
 
-            # We are NOT looking at one of the high-level structures like
-            # lists or blockquotes.  So, it's just a regular paragraph
-            # (though perhaps nested inside a list or something else).  If
-            # we are NOT inside a list, we just need to look for a blank
-            # line to find the end of the block.  If we ARE inside a
-            # list, however, we need to consider that a sublist does not
-            # need to be separated by a blank line.  Rather, the following
-            # markup is legal:
-            #
-            # * The top level list item
-            #
-            #     Another paragraph of the list.  This is where we are now.
-            #     * Underneath we might have a sublist.
-            #
+class ImageReferencePattern (ReferencePattern):
+    """ Match to a stored reference and return img element. """
+    def makeTag(self, href, title, text):
+        el = etree.Element("img")
+        el.set("src", self.sanitize_url(href))
+        if title:
+            el.set("title", title)
+        el.set("alt", text)
+        return el
+
+
+class AutolinkPattern (Pattern):
+    """ Return a link Element given an autolink (`<http://example/com>`). """
+    def handleMatch(self, m):
+        el = etree.Element("a")
+        el.set('href', m.group(2))
+        el.text = AtomicString(m.group(2))
+        return el
+
+class AutomailPattern (Pattern):
+    """ 
+    Return a mailto link Element given an automail link (`<foo@example.com>`). 
+    """
+    def handleMatch(self, m):
+        el = etree.Element('a')
+        email = m.group(2)
+        if email.startswith("mailto:"):
+            email = email[len("mailto:"):]
 
-            if inList:
+        def codepoint2name(code):
+            """Return entity definition by code, or the code if not defined."""
+            entity = htmlentitydefs.codepoint2name.get(code)
+            if entity:
+                return "%s%s;" % (AMP_SUBSTITUTE, entity)
+            else:
+                return "%s#%d;" % (AMP_SUBSTITUTE, code)
 
-                start, lines  = self._linesUntil(lines, (lambda line:
-                                 CORE_RE['ul'].match(line)
-                                 or CORE_RE['ol'].match(line)
-                                                  or not line.strip()))
+        letters = [codepoint2name(ord(letter)) for letter in email]
+        el.text = AtomicString(''.join(letters))
 
-                self._processSection(parent_elem, start,
-                                     inList - 1, looseList = looseList)
-                inList = inList-1
+        mailto = "mailto:" + email
+        mailto = "".join([AMP_SUBSTITUTE + '#%d;' % 
+                          ord(letter) for letter in mailto])
+        el.set('href', mailto)
+        return el
 
-            else: # Ok, so it's just a simple block
+ESCAPE_PATTERN          = SimpleTextPattern(ESCAPE_RE)
+NOT_STRONG_PATTERN      = SimpleTextPattern(NOT_STRONG_RE)
 
-                paragraph, lines = self._linesUntil(lines, lambda line:
-                                                     not line.strip() or line[0] == '>')
+BACKTICK_PATTERN        = BacktickPattern(BACKTICK_RE)
+STRONG_PATTERN          = SimpleTagPattern(STRONG_RE, 'strong')
+EMPHASIS_PATTERN        = SimpleTagPattern(EMPHASIS_RE, 'em')
+EMPHASIS_PATTERN_2      = SimpleTagPattern(EMPHASIS_2_RE, 'em')
 
-                if len(paragraph) and paragraph[0].startswith('#'):
-                    self._processHeader(parent_elem, paragraph)
-                    
-                elif len(paragraph) and \
-                CORE_RE["isline3"].match(paragraph[0]):
+STRONG_EM_PATTERN       = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
 
-                    self._processHR(parent_elem)
-                    lines = paragraph[1:] + lines
-                    
-                elif paragraph:
-                    self._processParagraph(parent_elem, paragraph,
-                                          inList, looseList)
+LINE_BREAK_PATTERN      = SubstituteTagPattern(LINE_BREAK_RE, 'br')
+LINE_BREAK_PATTERN_2    = SubstituteTagPattern(LINE_BREAK_2_RE, 'br')
 
-            if lines and not lines[0].strip():
-                lines = lines[1:]  # skip the first (blank) line
+LINK_PATTERN            = LinkPattern(LINK_RE)
+IMAGE_LINK_PATTERN      = ImagePattern(IMAGE_LINK_RE)
+IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
+REFERENCE_PATTERN       = ReferencePattern(REFERENCE_RE)
 
-    def _processHR(self, parentElem):
-        hr = etree.SubElement(parentElem, "hr")
-    
-    def _processHeader(self, parentElem, paragraph):
-        m = CORE_RE['header'].match(paragraph[0])
-        if m:
-            level = len(m.group(1))
-            h = etree.SubElement(parentElem, "h%d" % level)
-            h.text = m.group(2).strip()
-        else:
-            message(CRITICAL, "We've got a problem header!")
+HTML_PATTERN            = HtmlPattern(HTML_RE)
+ENTITY_PATTERN          = HtmlPattern(ENTITY_RE)
 
+AUTOLINK_PATTERN        = AutolinkPattern(AUTOLINK_RE)
+AUTOMAIL_PATTERN        = AutomailPattern(AUTOMAIL_RE)
 
-    def _processParagraph(self, parentElem, paragraph, inList, looseList):
 
-        if ( parentElem.tag == 'li'
-                and not (looseList or parentElem.getchildren())):
+"""
+POST-PROCESSORS
+=============================================================================
 
-            # If this is the first paragraph inside "li", don't
-            # put <p> around it - append the paragraph bits directly
-            # onto parentElem
-            el = parentElem
-        else:
-            # Otherwise make a "p" element
-            el = etree.SubElement(parentElem, "p")
+Markdown also allows post-processors, which are similar to preprocessors in
+that they need to implement a "run" method. However, they are run after core
+processing.
 
-        dump = []
-        
-        # Searching for hr or header
-        for line in paragraph:
-            # it's hr
-            if CORE_RE["isline3"].match(line):
-                el.text = "\n".join(dump)
-                self._processHR(el)
-                dump = []
-            # it's header
-            elif line.startswith("#"):
-                el.text = "\n".join(dump)   
-                self._processHeader(parentElem, [line])
-                dump = [] 
-            else:
-                dump.append(line)
-        if dump:
-            text = "\n".join(dump)    
-            el.text = text
+There are two types of post-processors: Postprocessor and TextPostprocessor
+"""
 
-    def _processUList(self, parentElem, lines, inList):
-        self._processList(parentElem, lines, inList,
-                         listexpr='ul', tag = 'ul')
+class Postprocessor:
+    """
+    Postprocessors are run before the ElementTree serialization.
+    
+    Each Postprocessor implements a "run" method that takes a pointer to a
+    ElementTree, modifies it as necessary and returns a ElementTree 
+    document.
+    
+    Postprocessors must extend markdown.Postprocessor.
 
-    def _processOList(self, parentElem, lines, inList):
-        self._processList(parentElem, lines, inList,
-                         listexpr='ol', tag = 'ol')
+    """
+    def run(self, root):
+        """
+        Subclasses of Postprocessor should implement a `run` method, which
+        takes a root Element. Method can return another Element, and global
+        root Element will be replaced, or just modify current and return None.
+        """
+        pass
 
 
-    def _processList(self, parentElem, lines, inList, listexpr, tag):
+class TextPostprocessor:
+    """
+    TextPostprocessors are run after the ElementTree it converted back into text.
+    
+    Each TextPostprocessor implements a "run" method that takes a pointer to a
+    text string, modifies it as necessary and returns a text string.
+    
+    TextPostprocessors must extend markdown.TextPostprocessor.
+    
+    """
+
+    def run(self, text):
         """
-        Given a list of document lines starting with a list item,
-        finds the end of the list, breaks it up, and recursively
-        processes each list item and the remainder of the text file.
+        Subclasses of TextPostprocessor should implement a `run` method, which
+        takes the html document as a single text string and returns a 
+        (possibly modified) string.
 
-        Keyword arguments:
-        
-        * parentElem: A ElementTree element to which the content will be added
-        * lines: a list of lines
-        * inList: a level
-        
-        Returns: None
-        
         """
-        ul = etree.SubElement(parentElem, tag) # ul might actually be '<ol>'
+        pass
 
-        looseList = 0
 
-        # Make a list of list items
-        items = []
-        item = -1
+class PrettifyPostprocessor(Postprocessor):
+    """Add linebreaks to the html document."""
+    def _prettifyETree(self, elem):
+        """Recursively add linebreaks to ElementTree children."""
+        i = "\n"
+        if isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+            if (not elem.text or not elem.text.strip()) \
+                    and len(elem) and isBlockLevel(elem[0].tag):
+                elem.text = i
+            for e in elem:
+                if isBlockLevel(e.tag):
+                    self._prettifyETree(e)
+            if not elem.tail or not elem.tail.strip():
+                elem.tail = i
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
 
-        i = 0  # a counter to keep track of where we are
+    def run(self, root):
+        """.Add linebreaks to ElementTree root object."""
+        self._prettifyETree(root)
+        # Do <br />'s seperately as they are often in the middle of
+        # inline content and missed by _prettifyETree.
+        brs = root.getiterator('br')
+        for br in brs:
+            if not br.tail or not br.tail.strip():
+                br.tail = '\n'
+            else:
+                br.tail = '\n%s' % br.tail
 
-        for line in lines: 
+PRETTIFYPOSTPROCESSOR = PrettifyPostprocessor()
 
-            loose = 0
-            if not line.strip():
-                # If we see a blank line, this _might_ be the end of the list
-                i += 1
-                loose = 1
 
-                # Find the next non-blank line
-                for j in range(i, len(lines)):
-                    if lines[j].strip():
-                        next = lines[j]
-                        break
+class RawHtmlTextPostprocessor(TextPostprocessor):
+    """ Restore raw html to the document. """
+    def __init__(self):
+        pass
+
+    def run(self, text):
+        """ Iterate over html stash and restore "safe" html. """
+        for i in range(self.stash.html_counter):
+            html, safe  = self.stash.rawHtmlBlocks[i]
+            if self.safeMode and not safe:
+                if str(self.safeMode).lower() == 'escape':
+                    html = self.escape(html)
+                elif str(self.safeMode).lower() == 'remove':
+                    html = ''
                 else:
-                    # There is no more text => end of the list
-                    break
+                    html = HTML_REMOVED_TEXT
+            if safe or not self.safeMode:
+                text = text.replace("<p>%s</p>" % (HTML_PLACEHOLDER % i),
+                                    html + "\n")
+            text =  text.replace(HTML_PLACEHOLDER % i, html)
+        return text
 
-                # Check if the next non-blank line is still a part of the list
+    def escape(self, html):
+        """ Basic html escaping """
+        html = html.replace('&', '&amp;')
+        html = html.replace('<', '&lt;')
+        html = html.replace('>', '&gt;')
+        return html.replace('"', '&quot;')
 
-                if ( CORE_RE[listexpr].match(next) or
-                     CORE_RE['tabbed'].match(next) ):
-                    # get rid of any white space in the line
-                    items[item].append(line.strip())
-                    looseList = loose or looseList
-                    continue
-                else:
-                    break # found end of the list
+RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
 
-            # Now we need to detect list items (at the current level)
-            # while also detabing child elements if necessary
 
-            for expr in ['ul', 'ol', 'tabbed']:
+class AndSubstitutePostprocessor(TextPostprocessor):
+    """ Restore valid entities """
+    def __init__(self):
+        pass
 
-                m = CORE_RE[expr].match(line)
-                if m:
-                    if expr in ['ul', 'ol']:  # We are looking at a new item
-                        #if m.group(1) :
-                        # Removed the check to allow for a blank line
-                        # at the beginning of the list item
-                        items.append([m.group(1)])
-                        item += 1
-                    elif expr == 'tabbed':  # This line needs to be detabbed
-                        items[item].append(m.group(4)) #after the 'tab'
+    def run(self, text):
 
-                    i += 1
-                    break
-            else:
-                items[item].append(line)  # Just regular continuation
-                i += 1 # added on 2006.02.25
-        else:
-            i += 1
+        text =  text.replace(AMP_SUBSTITUTE, "&")
+        return text
 
-        # Add the ElementTree elements
-        for item in items:
-            li = etree.SubElement(ul, "li")
+AMPSUBSTITUTETEXTPOSTPROCESSOR = AndSubstitutePostprocessor()
 
-            self._processSection(li, item, inList + 1, looseList = looseList)
 
-        # Process the remaining part of the section
+"""
+MISC AUXILIARY CLASSES
+=============================================================================
+"""
 
-        self._processSection(parentElem, lines[i:], inList)
+class AtomicString(unicode):
+    """A string which should not be further processed."""
+    pass
 
 
-    def _linesUntil(self, lines, condition):
-        """ 
-        A utility function to break a list of lines upon the
-        first line that satisfied a condition.  The condition
-        argument should be a predicate function.
-        
-        """
-        i = -1
-        for line in lines:
-            i += 1
-            if condition(line): 
-                break
-        else:
-            i += 1
-        return lines[:i], lines[i:]
+class HtmlStash:
+    """
+    This class is used for stashing HTML objects that we extract
+    in the beginning and replace with place-holders.
+    """
 
-    def _processQuote(self, parentElem, lines, inList):
+    def __init__ (self):
+        """ Create a HtmlStash. """
+        self.html_counter = 0 # for counting inline html segments
+        self.rawHtmlBlocks=[]
+
+    def store(self, html, safe=False):
         """
-        Given a list of document lines starting with a quote finds
-        the end of the quote, unindents it and recursively
-        processes the body of the quote and the remainder of the
-        text file.
+        Saves an HTML segment for later reinsertion.  Returns a
+        placeholder string that needs to be inserted into the
+        document.
 
         Keyword arguments:
         
-        * parentElem: ElementTree element to which the content will be added
-        * lines: a list of lines
-        * inList: a level
+        * html: an html segment
+        * safe: label an html segment as safe for safemode
         
-        Returns: None 
+        Returns : a placeholder string 
         
         """
-        dequoted = []
-        i = 0
-        blank_line = False # allow one blank line between paragraphs
-        for line in lines:
-            m = CORE_RE['quoted'].match(line)
-            if m:
-                dequoted.append(m.group(1))
-                i += 1
-                blank_line = False
-            elif not blank_line and line.strip() != '':
-                dequoted.append(line)
-                i += 1
-            elif not blank_line and line.strip() == '':
-                dequoted.append(line)
-                i += 1
-                blank_line = True
-            else:
-                break
-
-        blockquote = etree.SubElement(parentElem, "blockquote")
-
-        self._processSection(blockquote, dequoted, inList)
-        self._processSection(parentElem, lines[i:], inList)
-
+        self.rawHtmlBlocks.append((html, safe))
+        placeholder = HTML_PLACEHOLDER % self.html_counter
+        self.html_counter += 1
+        return placeholder
+    
+    def rest(self):
+        self.html_counter = 0
+        self.rawHtmlBlocks = []
 
+    
+class InlineStash:
+    
+    def __init__(self):
+        """ Create a InlineStash. """
+        self.prefix = INLINE_PLACEHOLDER_PREFIX
+        self.suffix = ETX
+        self._nodes = {}
+        self.phLength = 4 + len(self.prefix) + len(self.suffix)
+        self._placeholder_re = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
+        
+    def _genPlaceholder(self, type):
+        """ Generate a placeholder """
+        id = "%04d" % len(self._nodes)
+        hash = INLINE_PLACEHOLDER % id 
+        return hash, id
+    
+    def extractId(self, data, index):
+        """ 
+        Extract id from data string, start from index
+        
+        Keyword arguments:
+        
+        * data: string
+        * index: index, from which we start search 
+        
+        Returns: placeholder id and  string index, after 
+        found placeholder
+        """
+        m = self._placeholder_re.search(data, index)
+        if m:
+            return m.group(1), m.end()
+        else:
+            return None, index + 1 
+    
+    def isin(self, id):
+        """ Check if node with given id exists in stash """
+        return self._nodes.has_key(id)
+    
+    def get(self, id):
+        """ Return node by id """
+        return self._nodes.get(id)
+    
+    def add(self, node, type):
+        """ Add node to stash """
+        pholder, id = self._genPlaceholder(type)
+        self._nodes[id] = node
+        return pholder
+    
+    def rest(self):
+        """ Reset instance """
+        self._nodes = {}
+    
+           
 
+class Markdown:
+    """Convert Markdown to HTML."""
 
-    def _processCodeBlock(self, parentElem, lines, inList):
+    def __init__(self, 
+                 extensions=[],
+                 extension_configs={},
+                 safe_mode = False):
         """
-        Given a list of document lines starting with a code block
-        finds the end of the block, puts it into the ElementTree verbatim
-        wrapped in ("<pre><code>") and recursively processes the
-        the remainder of the text file.
+        Creates a new Markdown instance.
 
         Keyword arguments:
         
-        * parentElem: ElementTree element to which the content will be added
-        * lines: a list of lines
-        * inList: a level
-        
-        Returns: None
+        * extensions: A list of extensions.  
+           If they are of type string, the module mdx_name.py will be loaded.  
+           If they are a subclass of markdown.Extension, they will be used 
+           as-is.
+        * extension-configs: Configuration setting for extensions.
+        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
         
         """
-        detabbed, theRest = self.detectTabbed(lines)
+        self.parser = MarkdownParser()
+        self.safeMode = safe_mode
+        self.registeredExtensions = []
+        self.docType = ""
+        self.stripTopLevelTags = True
 
-        pre = etree.SubElement(parentElem, "pre")
-        code = etree.SubElement(pre, "code")
-        
-        text = "\n".join(detabbed).rstrip()+"\n"
-        code.text = AtomicString(text)
-        self._processSection(parentElem, theRest, inList)        
+        self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
 
-    def detectTabbed(self, lines):
-        """ Find indented text and remove indent before further proccesing.
+        self.preprocessors = [HEADER_PREPROCESSOR,
+                              LINE_PREPROCESSOR,
+                              # A footnote preprocessor will
+                              # get inserted here
+                              REFERENCE_PREPROCESSOR]
 
-        Keyword arguments:
+
+        self.postprocessors = [PRETTIFYPOSTPROCESSOR,
+                               # a footnote postprocessor will get
+                               # inserted later
+                               ]
+
+        self.textPostprocessors = [# a footnote postprocessor will get
+                                   # inserted here
+                                   RAWHTMLTEXTPOSTPROCESSOR,
+                                   AMPSUBSTITUTETEXTPOSTPROCESSOR]
+
+        self.prePatterns = []
+                               
+        self.inlinePatterns = [
+                               BACKTICK_PATTERN,
+                               ESCAPE_PATTERN,
+                               REFERENCE_PATTERN,
+                               LINK_PATTERN,
+                               IMAGE_LINK_PATTERN,
+                               IMAGE_REFERENCE_PATTERN,
+                               AUTOLINK_PATTERN,
+                               AUTOMAIL_PATTERN,
+                               LINE_BREAK_PATTERN_2,
+                               LINE_BREAK_PATTERN,
+                               HTML_PATTERN,
+                               ENTITY_PATTERN,
+                               NOT_STRONG_PATTERN,
+                               STRONG_EM_PATTERN,
+                               STRONG_PATTERN,
+                               EMPHASIS_PATTERN,
+                               EMPHASIS_PATTERN_2
+                               # The order of the handlers matters!!!
+                               ]
         
-        * lines: an array of strings
-        * fn: a function that returns a substring of a string
-           if the string matches the necessary criteria
+        self.inlineStash = InlineStash()
+        self.references = {}
+        self.htmlStash = HtmlStash()
+
+
+        self.registerExtensions(extensions = extensions,
+                                configs = extension_configs)
+
+        self.reset()
+
+
+    def registerExtensions(self, extensions, configs):
+        """ 
+        Register extensions with this instance of Markdown.
+
+        Keyword aurguments:
         
-        Returns: a list of post processes items and the unused
-        remainder of the original list
+        * extensions: A list of extensions, which can either
+           be strings or objects.  See the docstring on Markdown.
+        * configs: A dictionary mapping module names to config options. 
         
         """
-        items = []
-        item = -1
-        i = 0 # to keep track of where we are
+        for ext in extensions:
+            if isinstance(ext, basestring):
+                ext = load_extension(ext, configs.get(ext, []))
+            elif hasattr(ext, 'extendMarkdown'):
+                # Looks like an Extension.
+                # Nothing to do here.
+                pass
+            else:
+                message(ERROR, "Incorrect type! Extension '%s' is "
+                               "neither a string or an Extension." %(repr(ext)))
+                continue
+            ext.extendMarkdown(self, globals())
 
-        def detab(line):
-            match = CORE_RE['tabbed'].match(line)
-            if match:
-               return match.group(4)
+    def registerExtension(self, extension):
+        """ This gets called by the extension """
+        self.registeredExtensions.append(extension)
 
-        for line in lines:
-            if line.strip(): # Non-blank line
-                line = detab(line)
-                if line:
-                    items.append(line)
-                    i += 1
-                    continue
-                else:
-                    return items, lines[i:]
+    def reset(self):
+        """
+        Resets all state variables so that we can start with a new text.
+        """
+        self.inlineStash.rest()
+        self.htmlStash.rest()
+        self.references.clear()
 
-            else: # Blank line: _maybe_ we are done.
-                i += 1 # advance
+        HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
+        LINE_PREPROCESSOR.stash = self.htmlStash
+        REFERENCE_PREPROCESSOR.references = self.references
+        HTML_PATTERN.stash = self.htmlStash
+        ENTITY_PATTERN.stash = self.htmlStash
+        REFERENCE_PATTERN.references = self.references
+        IMAGE_REFERENCE_PATTERN.references = self.references
+        RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
+        RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
 
-                # Find the next non-blank line
-                for j in range(i, len(lines)):  
-                    if lines[j].strip():
-                        next_line = lines[j]; break
-                else:
-                    break # There is no more text; we are done.
+        for extension in self.registeredExtensions:
+            extension.reset()
 
-                # Check if the next non-blank line is tabbed
-                if detab(next_line): # Yes, more work to do.
-                    items.append("")
-                    continue
-                else:
-                    break # No, we are done.
-        else:
-            i += 1
+        for pattern in self.inlinePatterns:
+            pattern.safe_mode = self.safeMode
 
-        return items, lines[i:]
-        
     def _handleInline(self, data, patternIndex=0):
         """
         Process string with inline patterns and replace it
@@ -1563,50 +1615,8 @@ class Markdown:
             if not matched:
                 patternIndex += 1
         return data
-    
-    def _applyInline(self, pattern, data, patternIndex, startIndex=0):
-        """ 
-        Check if the line fits the pattern, create the necessary 
-        elements, add it to InlineStash
-        
-        Keyword arguments:
-        
-        * data: the text to be processed
-        * pattern: the pattern to be checked
-        * patternIndex: index of current pattern
-        * startIndex: string index, from which we starting search
-
-        Returns: String with placeholders instead of ElementTree elements.
-        """
-        match = pattern.getCompiledRegExp().match(data[startIndex:])
-        leftData = data[:startIndex]
- 
-        if not match:
-            return data, False, 0
 
-        node = pattern.handleMatch(match)
-     
-        if node is None:
-            return data, True, len(leftData) + match.span(len(match.groups()))[0]
-        
-        if not isString(node):         
-            if not isinstance(node.text, AtomicString):
-                # We need to process current node too
-                for child in [node] + node.getchildren():
-                    if not isString(node):
-                        if child.text:
-                            child.text = self._handleInline(child.text, 
-                                                            patternIndex + 1)
-                        if child.tail:
-                            child.tail = self._handleInline(child.tail, 
-                                                            patternIndex)
-   
-        pholder = self.inlineStash.add(node, pattern.type())
 
-        return "%s%s%s%s" % (leftData, 
-                             match.group(1), 
-                             pholder, match.groups()[-1]), True, 0
-   
     def _processElementText(self, node, subnode, isText=True):
         """
         Process placeholders in Element.text or Element.tail
@@ -1706,6 +1716,51 @@ class Markdown:
                 data = ""
 
         return result
+
+    
+    def _applyInline(self, pattern, data, patternIndex, startIndex=0):
+        """ 
+        Check if the line fits the pattern, create the necessary 
+        elements, add it to InlineStash
+        
+        Keyword arguments:
+        
+        * data: the text to be processed
+        * pattern: the pattern to be checked
+        * patternIndex: index of current pattern
+        * startIndex: string index, from which we starting search
+
+        Returns: String with placeholders instead of ElementTree elements.
+        """
+        match = pattern.getCompiledRegExp().match(data[startIndex:])
+        leftData = data[:startIndex]
+ 
+        if not match:
+            return data, False, 0
+
+        node = pattern.handleMatch(match)
+     
+        if node is None:
+            return data, True, len(leftData) + match.span(len(match.groups()))[0]
+        
+        if not isString(node):         
+            if not isinstance(node.text, AtomicString):
+                # We need to process current node too
+                for child in [node] + node.getchildren():
+                    if not isString(node):
+                        if child.text:
+                            child.text = self._handleInline(child.text, 
+                                                            patternIndex + 1)
+                        if child.tail:
+                            child.tail = self._handleInline(child.tail, 
+                                                            patternIndex)
+   
+        pholder = self.inlineStash.add(node, pattern.type())
+
+        return "%s%s%s%s" % (leftData, 
+                             match.group(1), 
+                             pholder, match.groups()[-1]), True, 0
+
     
     def applyInlinePatterns(self, markdownTree):
         """
@@ -1756,66 +1811,36 @@ class Markdown:
                
         return markdownTree
 
-    def markdownToTree(self, source=None):
-        """Create ElementTree, without applying inline paterns.
-        
-        Keyword arguments:
-        
-        * source: An ascii or unicode string of Markdown formated text.
+    def convert (self, source):
+        """Convert markdown to serialized XHTML."""
 
-        Returns: ElementTree object.
-        """
+        # Fixup the source text
+        if not source:
+            return u""  # a blank unicode string
         try:
-            self.source = unicode(self.source)
+            source = unicode(source)
         except UnicodeDecodeError:
-            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii  input.')
+            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
             return u""
-        
-        # Fixup the source text
-        self.source = self.source.replace(STX, "")
-        self.source = self.source.replace(ETX, "")
-        self.source = self.source.replace("\r\n", "\n").replace("\r", "\n")
-        self.source += "\n\n"
-        self.source = self.source.expandtabs(TAB_LENGTH)
 
+        source = source.replace(STX, "")
+        source = source.replace(ETX, "")
+        source = source.replace("\r\n", "\n").replace("\r", "\n")
+        source += "\n\n"
+        source = source.expandtabs(TAB_LENGTH)
+
+        # Run the text preprocessors
         for pp in self.textPreprocessors:
-            self.source = pp.run(self.source)
+            source = pp.run(source)
 
-        # Split into lines and run the preprocessors that will work with 
-        # self.lines
-        self.lines = self.source.split("\n")
+        # Split into lines and run the line preprocessors.
+        self.lines = source.split("\n")
         for prep in self.preprocessors :
             self.lines = prep.run(self.lines)
 
-        # Create a ElementTree from the lines
-        self.root = etree.Element("div")
-        buffer = []
-        for line in self.lines:
-            if line.startswith("#"):
-                self._processSection(self.root, buffer)
-                buffer = [line]
-            else:
-                buffer.append(line)
-
-        self._processSection(self.root, buffer)
-    
-        return etree.ElementTree(self.root)
-
-
-    def convert (self, source):
-        """Convert markdown to serialized XHTML.
-
-        Keyword arguments:
-        
-        * source: An ascii or unicode string of Markdown formated text.
-
-        """
-        self.source = source
-        if not self.source:
-            return u""  # a blank unicode string
+        # Parse the high-level elements.
+        tree = self.parser.parseDocument(self.lines)
 
-        # Build a tree from the Markdown source and get its root.
-        tree = self.markdownToTree(source)
         root = self.applyInlinePatterns(tree).getroot()
 
         # Run the post-processors
@@ -1836,98 +1861,47 @@ class Markdown:
 
         return xml.strip()
 
-    def __str__(self):
-        """ Report info about instance. Markdown always returns unicode."""
-        if self.source is None:
-            status = 'in which no source text has been assinged.'
-        else:
-            status = 'which contains %d chars and %d line(s) of source.'%\
-                     (len(self.source), self.source.count('\n')+1)
-        return 'An instance of "%s" %s'% (self.__class__, status)
-
-    __unicode__ = convert # markdown should always return a unicode string
-
-
-"""
-EXPORTED FUNCTIONS
-=============================================================================
-
-Those are the two functions we really mean to export: markdown() and
-markdownFromFile().
-"""
-
-def markdownFromFile(input = None,
-                     output = None,
-                     extensions = [],
-                     encoding = None,
-                     safe = False):
-    """Converts a markdown file and returns the HTML as a unicode string.
-
-    Used from the command-line, although may be useful in other situations. 
-    Decodes the file using the provided encoding (defaults to utf-8), passes 
-    the file content to markdown, and outputs the html to either the provided
-    filename or stdout in the same encoding as the source file.
-
-    **Note:** This is the only place that decoding and encoding of unicode
-    takes place in Python-Markdown.  (All other code is unicode-in /
-    unicode-out.)
-
-    Keyword arguments:
-
-    * input: Name of source text file.
-    * output: Name of output file. Writes to stdout if `None`.
-    * extensions: A list of extension names (may contain config args).  
-    * encoding: Encoding of input and output files. Defaults to utf-8.
-    * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
-
-    """
-    
-    encoding = encoding or "utf-8"
-
-    # Read the source
-    input_file = codecs.open(input, mode="r", encoding=encoding)
-    text = input_file.read()
-    input_file.close()
-    text = text.lstrip(u'\ufeff') # remove the byte-order mark
-
-    # Convert
-    html = markdown(text, extensions, safe_mode = safe)
-
-    # Write to file or stdout
-    if output:
-        output_file = codecs.open(output, "w", encoding=encoding)
-        output_file.write(html)
-        output_file.close()
-    else:
-        sys.stdout.write(html.encode(encoding))
-
-def markdown(text,
-             extensions = [],
-             safe_mode = False):
-    """
-    Convenience wrapper function for `Markdown` class.
-
-    Useful in a typical use case. Initializes an instance of the `Markdown` 
-    class, loads any extensions and runs the parser on the given text. 
-
-    Keyword arguments:
-
-    * text: An ascii or Unicode string of Markdown formatted text.
-    * extensions: A list of extension names (may contain config args).  
-    * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
+    def convertFile(input = None, output = None, encoding = None):
+        """Converts a markdown file and returns the HTML as a unicode string.
 
-    Returns: An HTML document as a string.
+        Decodes the file using the provided encoding (defaults to utf-8),
+        passes the file content to markdown, and outputs the html to either
+        the provided stream or the file with provided name, using the same
+        encoding as the source file.
 
-    """
-    message(DEBUG, "in markdown.markdown(), received text:\n%s" % text)
+        **Note:** This is the only place that decoding and encoding of unicode
+        takes place in Python-Markdown.  (All other code is unicode-in /
+        unicode-out.)
 
-    extensions = [load_extension(e) for e in extensions]
+        Keyword arguments:
 
-    md = Markdown(extensions=extensions,
-                  safe_mode = safe_mode)
+        * input: Name of source text file.
+        * output: Name of output file. Writes to stdout if `None`.
+        * extensions: A list of extension names (may contain config args).  
+        * encoding: Encoding of input and output files. Defaults to utf-8.
+        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
 
-    return md.convert(text)
+        """
         
+        encoding = encoding or "utf-8"
+
+        # Read the source
+        input_file = codecs.open(input, mode="r", encoding=encoding)
+        text = input_file.read()
+        input_file.close()
+        text = text.lstrip(u'\ufeff') # remove the byte-order mark
+
+        # Convert
+        html = self.convert(text)
+
+        # Write to file or stdout
+        if type(output) == type("string"):
+            output_file = codecs.open(output, "w", encoding=encoding)
+            output_file.write(html)
+            output_file.close()
+        else:
+            output.write(html.encode(encoding))
+
 
 """
 Extensions
@@ -1966,65 +1940,113 @@ class Extension:
         
         This method must be overriden by every extension.
 
-        Ketword arguments:
+        Keyword arguments:
 
         * md: The Markdown instance.
 
-        * md_globals: All global variables availabel in the markdown module
-        namespace.
+        * md_globals: Global variables in the markdown module namespace.
 
         """
         pass
 
 
 def load_extension(ext_name, configs = []):
-    """ 
-    Load extension by name, then return the module.
+    """Load extension by name, then return the module.
     
     The extension name may contain arguments as part of the string in the 
-    following format:
-
-        "extname(key1=value1,key2=value2)"
-    
-    Print an error message and exit on failure. 
+    following format: "extname(key1=value1,key2=value2)"
     
     """
 
-    # I am making the assumption that the order of config options
-    # does not matter.
+    # Parse extensions config params (ignore the order)
     configs = dict(configs)
-    pos = ext_name.find("(") 
+    pos = ext_name.find("(") # find the first "("
     if pos > 0:
         ext_args = ext_name[pos+1:-1]
         ext_name = ext_name[:pos]
         pairs = [x.split("=") for x in ext_args.split(",")]
         configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
 
+    # Setup the module names
     ext_module = 'markdown_extensions'
-    module_name = '.'.join([ext_module, ext_name])
-    extension_module_name = '_'.join(['mdx', ext_name])
+    module_name_new_style = '.'.join([ext_module, ext_name])
+    module_name_old_style = '_'.join(['mdx', ext_name])
 
-    try:
-            module = __import__(module_name, {}, {}, [ext_module])
+    # Try loading the extention first from one place, then another
+    try: # New style (markdown_extensons.<extension>)
+        module = __import__(module_name_new_style, {}, {}, [ext_module])
     except ImportError:
+        try: # Old style (mdx.<extension>)
+            module = __import__(module_name_old_style)
+        except ImportError:
+            pass
+
+    if module :
+        # If the module is loaded successfully, we expect it to define a
+        # function called makeExtension()
         try:
-            module = __import__(extension_module_name)
+            return module.makeExtension(configs.items())
         except:
-            message(WARN,
-                "Failed loading extension '%s' from '%s' or '%s' "
-                "- continuing without."
-                % (ext_name, module_name, extension_module_name) )
-            # Return a dummy (do nothing) Extension as silent failure
-            return Extension(configs={})
-
-    return module.makeExtension(configs.items())    
+            message(WARN, "Failed to instantiate extension '%s'" % ext_name)
+    else:
+       message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
+               % (ext_name, module_name_new_style, module_name_old_style))
 
+def load_extensions(ext_names):
+    """Loads multiple extensions"""
+    extensions = []
+    for ext_name in ext_names:
+        extension = load_extension(ext_name)
+        if extension:
+            extensions.append(extension)
 
 # Extensions should use "markdown.etree" instead of "etree" (or do `from
 # markdown import etree`).  Do not import it by yourself.
 
 etree = importETree() 
 
+"""
+EXPORTED FUNCTIONS
+=============================================================================
+
+Those are the two functions we really mean to export: markdown() and
+markdownFromFile().
+"""
+
+def markdown(text,
+             extensions = [],
+             safe_mode = False):
+    """Convert a markdown string to HTML and return HTML as a unicode string.
+
+    This is a shortcut function for `Markdown` class to cover the most
+    basic use case.  It initializes an instance of Markdown, loads the
+    necessary extensions and runs the parser on the given text. 
+
+    Keyword arguments:
+
+    * text: Markdown formatted text as Unicode or ASCII string.
+    * extensions: A list of extensions or extension names (may contain config args).  
+    * safe_mode: Disallow raw html.  One of "remove", "replace" or "escape".
+
+    Returns: An HTML document as a string.
+
+    """
+    md = Markdown(extensions=load_extensions(extensions),
+                  safe_mode = safe_mode)
+    return md.convert(text)
+
+
+def markdownFromFile(input = None,
+                     output = None,
+                     extensions = [],
+                     encoding = None,
+                     safe = False):
+
+
+    md = Markdown(extensions=load_extensions(extensions),
+                  safe_mode = safe_mode)
+    md.convertFile(input, output, encoding)
+
 
 """
 COMMAND-LINE SPECIFIC STUFF
-- 
cgit v1.2.3