From daa2d46b567e67aa4578a5c26a7d92e4cf5abc81 Mon Sep 17 00:00:00 2001
From: ryneeverett <ryneeverett@gmail.com>
Date: Thu, 3 Oct 2013 16:51:07 -0400
Subject: Issue #52

---
 docs/extensions/extra.txt            |  96 ++++++++++++++++++++++++++++
 markdown/extensions/extra.py         |  93 ++++++++++++++++++++++++---
 markdown/preprocessors.py            | 118 +++++++++++++++++++++++------------
 markdown/util.py                     |  17 ++++-
 tests/extensions/extra/raw-html.html |  29 +++++++--
 tests/extensions/extra/raw-html.txt  |  34 ++++++++++
 6 files changed, 330 insertions(+), 57 deletions(-)
diff --git a/docs/extensions/extra.txt b/docs/extensions/extra.txt
index d747496..6140647 100644
--- a/docs/extensions/extra.txt
+++ b/docs/extensions/extra.txt
@@ -41,3 +41,99 @@ therefore, not part of Python-Markdown Extra. If you really would
 like Extra to include additional extensions, we suggest creating 
 your own clone of Extra under a different name 
 (see the [Extension API](api.html)).  
+
+Markdown Inside HTML Blocks
+---------------------------
+
+Unlike the other Extra features, this feature is build into the markdown core and is turned on when `extra` is enabled.
+
+The content of any block-level element can be Markdown-formatted simply by adding a `markdown` attribute to the opening tag. The markdown attribute will be stripped from the output, but all other attributes will be preserved.
+
+If the markdown value is set to `1` (recommended) or any value other than `span` or `block`, the default behavior will be executed: `p`,`h[1-6]`,`li`,`dd`,`dt`,`td`,`th`,`legend`, and `address` elements skip block parsing while others do not. If the default is overrident by a value of `span`, *block parsing will be skipped* regardless of tag. If the default is overriden by a value of `block`, *block parsing will occur* regardless of tag.
+
+*An opening tag with the markdown attribute must start immediately on a line following a blank line.*
+
+#### Simple Example:
+```
+This is *true* markdown text.
+
+<div markdown="1">
+This is *true* markdown text.
+</div>
+```
+#### Result:
+```
+<p>This is <em>true</em> markdown text.</p>                                                                                                                                           
+<div>                                                                                                                                                                                 
+<p>This is <em>true</em> markdown text.</p>                                                                                                                                           
+</div>
+```
+
+### Nested Markdown Inside HTML BLocks
+Nested elements are more sensitive and must be used cautiously. Violation of the following will lead to unexpected behavior or unhandled exceptions.
+	* Only block mode elements may have further elements nested within them.
+	* The closing tag of inner elements must be followed by a blank line.
+	* More than one level of nesting is not supported (i.e., elements nested within elements nested within elements). This feature is not an alternative to templating. 
+
+#### Complex Example:
+```
+<div markdown="1" name="Example">
+
+The text of the `Example` element.
+
+<div markdown="1" name="DefaultBlockMode">
+This text gets wrapped in `p` tags.
+</div>
+
+The tail of the `DefaultBlockMode` subelement.
+
+<p markdown="1" name="DefaultSpanMode">
+This text *is not* wrapped in additional `p` tags.
+</p>
+
+The tail of the `DefaultSpanMode` subelement.
+
+<div markdown="span" name="SpanModeOverride">
+This `div` block is not wrapped in paragraph tags.
+Note: Subelements are not required to have tail text.
+</div>
+
+<p markdown="block" name="BlockModeOverride">
+This `p` block *is* foolishly wrapped in further paragraph tags.
+</p>
+
+The tail of the `BlockModeOverride` subelement.
+
+<div name="RawHtml">
+Raw html blocks may also be nested.
+</div>
+
+</div>
+
+This text is after the markdown in html.
+```
+#### Result:
+```
+<div name="Example">
+<p>The text of the <code>Example</code> element.</p>
+<div name="DefaultBlockMode">
+<p>This text gets wrapped in <code>p</code> tags.</p>
+</div>
+<p>The tail of the <code>DefaultBlockMode</code> subelement.</p>
+<p name="DefaultSpanMode">
+This text <em>is not</em> wrapped in additional <code>p</code> tags.</p>
+<p>The tail of the <code>DefaultSpanMode</code> subelement.</p>
+<div name="SpanModeOverride">
+This <code>div</code> block is not wrapped in paragraph tags.
+Note: Subelements are not required to have tail text.</div>
+<p name="BlockModeOverride">
+<p>This <code>p</code> block <em>is</em> foolishly wrapped in further paragraph tags.</p>
+</p>
+<p>The tail of the <code>BlockModeOverride</code> subelement.</p>
+<div name="RawHtml">
+Raw html blocks may also be nested.
+</div>
+
+</div>
+<p>This text is after the markdown in html.</p>
+```
diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py
index e6a1e82..dd70305 100644
--- a/markdown/extensions/extra.py
+++ b/markdown/extensions/extra.py
@@ -6,22 +6,22 @@ A compilation of various Python-Markdown extensions that imitates
 [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/).
 
 Note that each of the individual extensions still need to be available
-on your PYTHONPATH. This extension simply wraps them all up as a 
+on your PYTHONPATH. This extension simply wraps them all up as a
 convenience so that only one extension needs to be listed when
 initiating Markdown. See the documentation for each individual
 extension for specifics about that extension.
 
-In the event that one or more of the supported extensions are not 
-available for import, Markdown will issue a warning and simply continue 
-without that extension. 
+In the event that one or more of the supported extensions are not
+available for import, Markdown will issue a warning and simply continue
+without that extension.
 
-There may be additional extensions that are distributed with 
+There may be additional extensions that are distributed with
 Python-Markdown that are not included here in Extra. Those extensions
 are not part of PHP Markdown Extra, and therefore, not part of
 Python-Markdown Extra. If you really would like Extra to include
 additional extensions, we suggest creating your own clone of Extra
-under a differant name. You could also edit the `extensions` global 
-variable defined below, but be aware that such changes may be lost 
+under a differant name. You could also edit the `extensions` global
+variable defined below, but be aware that such changes may be lost
 when you upgrade to any future version of Python-Markdown.
 
 """
@@ -29,6 +29,9 @@ when you upgrade to any future version of Python-Markdown.
 from __future__ import absolute_import
 from __future__ import unicode_literals
 from . import Extension
+from ..blockprocessors import BlockProcessor
+from .. import util
+import re
 
 extensions = ['smart_strong',
               'fenced_code',
@@ -38,7 +41,7 @@ extensions = ['smart_strong',
               'tables',
               'abbr',
               ]
-              
+
 
 class ExtraExtension(Extension):
     """ Add various extensions to Markdown class."""
@@ -49,6 +52,80 @@ class ExtraExtension(Extension):
         if not md.safeMode:
             # Turn on processing of markdown text within raw html
             md.preprocessors['html_block'].markdown_in_raw = True
+            md.parser.blockprocessors.add('markdown_block',
+                                          MarkdownInHtmlProcessor(md.parser),
+                                          '_begin')
+            md.parser.blockprocessors.tag_counter = -1
+            md.parser.blockprocessors.contain_span_tags = re.compile(
+                r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE)
+
 
 def makeExtension(configs={}):
     return ExtraExtension(configs=dict(configs))
+
+
+class MarkdownInHtmlProcessor(BlockProcessor):
+    """Process Markdown Inside HTML Blocks."""
+    def test(self, parent, block):
+        return block == util.TAG_PLACEHOLDER % \
+            str(self.parser.blockprocessors.tag_counter + 1)
+
+    def _process_nests(self, element, block):
+        """Process the element's child elements in self.run."""
+        # Build list of indexes of each nest within the parent element.
+        nest_index = []  # a list of tuples: (left index, right index)
+        i = self.parser.blockprocessors.tag_counter + 1
+        while len(self.parser.markdown.htmlStash.tag_data) > i and self.\
+                parser.markdown.htmlStash.tag_data[i]['left_index']:
+            left_child_index = \
+                self.parser.markdown.htmlStash.tag_data[i]['left_index']
+            right_child_index = \
+                self.parser.markdown.htmlStash.tag_data[i]['right_index']
+            nest_index.append((left_child_index - 1, right_child_index))
+            i += 1
+
+        # Create each nest subelement.
+        i = 0
+        for n in nest_index[:-1]:
+            self.run(element, block[n[0]:n[1]],
+                     block[n[1]:nest_index[i + 1][0]], True)
+            i += 1
+        self.run(element, block[nest_index[-1][0]:nest_index[-1][1]],  # last
+                 block[nest_index[-1][1]:], True)                      # nest
+
+    def run(self, parent, blocks, tail=None, nest=False):
+        self.parser.blockprocessors.tag_counter += 1
+        tag_data = self.parser.markdown.htmlStash.tag_data[
+            self.parser.blockprocessors.tag_counter]
+
+        # Create Element
+        markdown_value = tag_data['attrs'].pop('markdown')
+        element = util.etree.SubElement(parent, tag_data['tag'],
+                                        tag_data['attrs'])
+
+        # Slice Off Block
+        if nest:
+            self.parser.parseBlocks(parent, tail)  # Process Tail
+            block = blocks[1:]
+        else:  # includes nests since a third level of nesting isn't supported
+            block = blocks[tag_data['left_index'] + 1:
+                           tag_data['right_index']]
+            del blocks[:tag_data['right_index']]
+
+        # Process Text
+        if (self.parser.blockprocessors.contain_span_tags.match(  # Span Mode
+                tag_data['tag']) and markdown_value != 'block') or \
+                markdown_value == 'span':
+            element.text = '\n'.join(block)
+        else:                                                     # Block Mode
+            i = self.parser.blockprocessors.tag_counter + 1
+            if len(self.parser.markdown.htmlStash.tag_data) > i and self.\
+                    parser.markdown.htmlStash.tag_data[i]['left_index']:
+                first_subelement_index = self.parser.markdown.htmlStash.\
+                    tag_data[i]['left_index'] - 1
+                self.parser.parseBlocks(
+                    element, block[:first_subelement_index])
+                if not nest:
+                    block = self._process_nests(element, block)
+            else:
+                self.parser.parseBlocks(element, block)
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index 72b2ed6..c532702 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -3,7 +3,7 @@ PRE-PROCESSORS
 =============================================================================
 
 Preprocessors work on source text before we start doing anything too
-complicated. 
+complicated.
 """
 
 from __future__ import absolute_import
@@ -62,7 +62,7 @@ class HtmlBlockPreprocessor(Preprocessor):
     right_tag_patterns = ["</%s>", "%s>"]
     attrs_pattern = r"""
         \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q)   # attr="value"
-        |                                                         # OR 
+        |                                                         # OR
         \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+)               # attr=value
         |                                                         # OR
         \s+(?P<attr2>[^>"'/= ]+)                                  # attr
@@ -102,7 +102,7 @@ class HtmlBlockPreprocessor(Preprocessor):
             i = block.find(rtag, start_index)
             if i == -1:
                 return -1
-            j = block.find(ltag, start_index) 
+            j = block.find(ltag, start_index)
             # if no ltag, or rtag found before another ltag, return index
             if (j > i or j == -1):
                 return i + len(rtag)
@@ -111,7 +111,7 @@ class HtmlBlockPreprocessor(Preprocessor):
             j = block.find('>', j)
             start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)
             if start_index == -1:
-                # HTML potentially malformed- ltag has no corresponding 
+                # HTML potentially malformed- ltag has no corresponding
                 # rtag
                 return -1
 
@@ -122,7 +122,7 @@ class HtmlBlockPreprocessor(Preprocessor):
             if i > 2:
                 return tag.lstrip("<").rstrip(">"), i
         return block.rstrip()[-left_index:-1].lower(), len(block)
-    
+
     def _equal_tags(self, left_tag, right_tag):
         if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
             return True
@@ -139,6 +139,49 @@ class HtmlBlockPreprocessor(Preprocessor):
     def _is_oneliner(self, tag):
         return (tag in ['hr', 'hr/'])
 
+    def _stringindex_to_listindex(self, stringindex, items):
+        """
+        Same effect as concatenating the strings in items,
+        finding the character to which stringindex refers in that string,
+        and returning the item in which that character resides.
+        """
+        items.append('dummy')
+        i, count = 0, 0
+        while count <= stringindex:
+            count += len(items[i])
+            i += 1
+        return i - 1
+
+    def _nested_markdown_in_html(self, items):
+        """Find and process html child elements of the given element block."""
+        i = 0
+        while i < len(items):
+            if self.left_tag_re.match(items[i]):
+                left_tag, left_index, attrs = \
+                    self._get_left_tag(''.join(items[i:]))
+                right_tag, data_index = self._get_right_tag(
+                    left_tag, left_index, ''.join(items[i:]))
+                right_listindex = \
+                    self._stringindex_to_listindex(data_index, items[i:]) + i
+                if 'markdown' in attrs.keys():
+                    placeholder = self.markdown.htmlStash.store_tag(
+                        left_tag, attrs, i + 1, right_listindex + 1)
+                    items = items[:i] + [placeholder] + \
+                        [items[i][left_index:]] + items[i + 1:]
+                    if len(items) - right_listindex <= 1:  # last nest, no tail
+                        right_listindex -= 1
+                    items[right_listindex] = items[right_listindex][
+                        :-len(right_tag) - 2]  # remove closing tag
+                else:  # raw html
+                    if len(items) - right_listindex <= 1:  # last element
+                        right_listindex -= 1
+                    placeholder = self.markdown.htmlStash.store('\n\n'.join(
+                        items[i:right_listindex]))
+                    del items[i:right_listindex]
+                    items.insert(i, placeholder)
+            i += 1
+        return items
+
     def run(self, lines):
         text = "\n".join(lines)
         new_blocks = []
@@ -165,14 +208,14 @@ class HtmlBlockPreprocessor(Preprocessor):
                         left_tag, left_index, attrs  = "--", 2, {}
                     else:
                         left_tag, left_index, attrs = self._get_left_tag(block)
-                    right_tag, data_index = self._get_right_tag(left_tag, 
+                    right_tag, data_index = self._get_right_tag(left_tag,
                                                                 left_index,
                                                                 block)
                     # keep checking conditions below and maybe just append
-                    
+
                     if data_index < len(block) \
                         and (util.isBlockLevel(left_tag)
-                        or left_tag == '--'): 
+                        or left_tag == '--'):
                         text.insert(0, block[data_index:])
                         block = block[:data_index]
 
@@ -186,22 +229,17 @@ class HtmlBlockPreprocessor(Preprocessor):
                         continue
 
                     if block.rstrip().endswith(">") \
-                        and self._equal_tags(left_tag, right_tag):
+                            and self._equal_tags(left_tag, right_tag):
                         if self.markdown_in_raw and 'markdown' in attrs.keys():
-                            start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', 
-                                           '', block[:left_index])
-                            end = block[-len(right_tag)-2:]
-                            block = block[left_index:-len(right_tag)-2]
-                            new_blocks.append(
-                                self.markdown.htmlStash.store(start))
-                            new_blocks.append(block)
-                            new_blocks.append(
-                                self.markdown.htmlStash.store(end))
+                            block = block[left_index:-len(right_tag) - 2]
+                            new_blocks.append(self.markdown.htmlStash.
+                                              store_tag(left_tag, attrs, 0, 2))
+                            new_blocks.extend([block])
                         else:
                             new_blocks.append(
                                 self.markdown.htmlStash.store(block.strip()))
                         continue
-                    else: 
+                    else:
                         # if is block level tag and is not complete
 
                         if util.isBlockLevel(left_tag) or left_tag == "--" \
@@ -214,7 +252,8 @@ class HtmlBlockPreprocessor(Preprocessor):
 
                         continue
 
-                new_blocks.append(block)
+                else:
+                    new_blocks.append(block)
 
             else:
                 items.append(block)
@@ -223,7 +262,7 @@ class HtmlBlockPreprocessor(Preprocessor):
 
                 if self._equal_tags(left_tag, right_tag):
                     # if find closing tag
-                    
+
                     if data_index < len(block):
                         # we have more text after right_tag
                         items[-1] = block[:data_index]
@@ -231,16 +270,17 @@ class HtmlBlockPreprocessor(Preprocessor):
 
                     in_tag = False
                     if self.markdown_in_raw and 'markdown' in attrs.keys():
-                        start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', 
-                                       '', items[0][:left_index])
                         items[0] = items[0][left_index:]
-                        end = items[-1][-len(right_tag)-2:]
-                        items[-1] = items[-1][:-len(right_tag)-2]
-                        new_blocks.append(
-                            self.markdown.htmlStash.store(start))
-                        new_blocks.extend(items)
-                        new_blocks.append(
-                            self.markdown.htmlStash.store(end))
+                        items[-1] = items[-1][:-len(right_tag) - 2]
+                        new_blocks.append(self.markdown.htmlStash.store_tag(
+                            left_tag, attrs, 0, len(items) + 2))
+                        placeholderslen = len(self.markdown.htmlStash.tag_data)
+                        new_blocks.extend(
+                            self._nested_markdown_in_html(items))
+                        nests = len(self.markdown.htmlStash.tag_data) - \
+                            placeholderslen
+                        self.markdown.htmlStash.tag_data[-1 - nests][
+                            'right_index'] += nests - 2
                     else:
                         new_blocks.append(
                             self.markdown.htmlStash.store('\n\n'.join(items)))
@@ -248,21 +288,19 @@ class HtmlBlockPreprocessor(Preprocessor):
 
         if items:
             if self.markdown_in_raw and 'markdown' in attrs.keys():
-                start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', 
-                               '', items[0][:left_index])
                 items[0] = items[0][left_index:]
-                end = items[-1][-len(right_tag)-2:]
-                items[-1] = items[-1][:-len(right_tag)-2]
+                items[-1] = items[-1][:-len(right_tag) - 2]
                 new_blocks.append(
-                    self.markdown.htmlStash.store(start))
-                new_blocks.extend(items)
-                if end.strip():
-                    new_blocks.append(
-                        self.markdown.htmlStash.store(end))
+                    self.markdown.htmlStash.store_tag(
+                        left_tag, attrs, 0, len(items) + 2))
+                placeholderslen = len(self.markdown.htmlStash.tag_data)
+                new_blocks.extend(self._nested_markdown_in_html(items))
+                nests = len(self.markdown.htmlStash.tag_data) - placeholderslen
+                self.markdown.htmlStash.tag_data[-1 - nests][
+                    'right_index'] += nests - 2
             else:
                 new_blocks.append(
                     self.markdown.htmlStash.store('\n\n'.join(items)))
-            #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
             new_blocks.append('\n')
 
         new_text = "\n\n".join(new_blocks)
diff --git a/markdown/util.py b/markdown/util.py
index d0ef8a3..8c2e6d7 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -40,6 +40,7 @@ INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
 AMP_SUBSTITUTE = STX+"amp"+ETX
 HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
 HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
+TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
 
 
 """
@@ -119,10 +120,12 @@ class HtmlStash(object):
     in the beginning and replace with place-holders.
     """
 
-    def __init__ (self):
+    def __init__(self):
         """ Create a HtmlStash. """
-        self.html_counter = 0 # for counting inline html segments
-        self.rawHtmlBlocks=[]
+        self.html_counter = 0  # for counting inline html segments
+        self.rawHtmlBlocks = []
+        self.tag_counter = 0
+        self.tag_data = []  # list of dictionaries in the order tags appear
 
     def store(self, html, safe=False):
         """
@@ -150,3 +153,11 @@ class HtmlStash(object):
     def get_placeholder(self, key):
         return HTML_PLACEHOLDER % key
 
+    def store_tag(self, tag, attrs, left_index, right_index):
+        """Store tag data and return a placeholder."""
+        self.tag_data.append({'tag': tag, 'attrs': attrs,
+                              'left_index': left_index,
+                              'right_index': right_index})
+        placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
+        self.tag_counter += 1  # equal to the tag's index in self.tag_data
+        return placeholder
diff --git a/tests/extensions/extra/raw-html.html b/tests/extensions/extra/raw-html.html
index b2a7c4d..897ad24 100644
--- a/tests/extensions/extra/raw-html.html
+++ b/tests/extensions/extra/raw-html.html
@@ -1,14 +1,31 @@
 <div>
-
 <p><em>foo</em></p>
 </div>
-
 <div class="baz">
-
 <p><em>bar</em></p>
 </div>
-
 <div>
-
 <p><em>blah</em></p>
-</div>
\ No newline at end of file
+</div>
+<div name="Example">
+<p>The text of the <code>Example</code> element.</p>
+<div name="DefaultBlockMode">
+<p>This text gets wrapped in <code>p</code> tags.</p>
+</div>
+<p>The tail of the <code>DefaultBlockMode</code> subelement.</p>
+<p name="DefaultSpanMode">
+This text <em>is not</em> wrapped in additional <code>p</code> tags.</p>
+<p>The tail of the <code>DefaultSpanMode</code> subelement.</p>
+<div name="SpanModeOverride">
+This <code>div</code> block is not wrapped in paragraph tags.
+Note: Subelements are not required to have tail text.</div>
+<p name="BlockModeOverride">
+<p>This <code>p</code> block <em>is</em> foolishly wrapped in further paragraph tags.</p>
+</p>
+<p>The tail of the <code>BlockModeOverride</code> subelement.</p>
+<div name="RawHtml">
+Raw html blocks may also be nested.
+</div>
+
+</div>
+<p>This text is after the markdown in html.</p>
\ No newline at end of file
diff --git a/tests/extensions/extra/raw-html.txt b/tests/extensions/extra/raw-html.txt
index 284fe0c..e906910 100644
--- a/tests/extensions/extra/raw-html.txt
+++ b/tests/extensions/extra/raw-html.txt
@@ -9,4 +9,38 @@ _bar_
 _blah_
 
 </div>
+ 
+<div markdown="1" name="Example">
 
+The text of the `Example` element.
+
+<div markdown="1" name="DefaultBlockMode">
+This text gets wrapped in `p` tags.
+</div>
+
+The tail of the `DefaultBlockMode` subelement.
+
+<p markdown="1" name="DefaultSpanMode">
+This text *is not* wrapped in additional `p` tags.
+</p>
+
+The tail of the `DefaultSpanMode` subelement.
+
+<div markdown="span" name="SpanModeOverride">
+This `div` block is not wrapped in paragraph tags.
+Note: Subelements are not required to have tail text.
+</div>
+
+<p markdown="block" name="BlockModeOverride">
+This `p` block *is* foolishly wrapped in further paragraph tags.
+</p>
+
+The tail of the `BlockModeOverride` subelement.
+
+<div name="RawHtml">
+Raw html blocks may also be nested.
+</div>
+
+</div>
+
+This text is after the markdown in html.
-- 
cgit v1.2.3