From daa2d46b567e67aa4578a5c26a7d92e4cf5abc81 Mon Sep 17 00:00:00 2001 From: ryneeverett Date: Thu, 3 Oct 2013 16:51:07 -0400 Subject: Issue #52 --- docs/extensions/extra.txt | 96 ++++++++++++++++++++++++++++ markdown/extensions/extra.py | 93 ++++++++++++++++++++++++--- markdown/preprocessors.py | 118 +++++++++++++++++++++++------------ markdown/util.py | 17 ++++- tests/extensions/extra/raw-html.html | 29 +++++++-- tests/extensions/extra/raw-html.txt | 34 ++++++++++ 6 files changed, 330 insertions(+), 57 deletions(-) diff --git a/docs/extensions/extra.txt b/docs/extensions/extra.txt index d747496..6140647 100644 --- a/docs/extensions/extra.txt +++ b/docs/extensions/extra.txt @@ -41,3 +41,99 @@ therefore, not part of Python-Markdown Extra. If you really would like Extra to include additional extensions, we suggest creating your own clone of Extra under a different name (see the [Extension API](api.html)). + +Markdown Inside HTML Blocks +--------------------------- + +Unlike the other Extra features, this feature is build into the markdown core and is turned on when `extra` is enabled. + +The content of any block-level element can be Markdown-formatted simply by adding a `markdown` attribute to the opening tag. The markdown attribute will be stripped from the output, but all other attributes will be preserved. + +If the markdown value is set to `1` (recommended) or any value other than `span` or `block`, the default behavior will be executed: `p`,`h[1-6]`,`li`,`dd`,`dt`,`td`,`th`,`legend`, and `address` elements skip block parsing while others do not. If the default is overrident by a value of `span`, *block parsing will be skipped* regardless of tag. If the default is overriden by a value of `block`, *block parsing will occur* regardless of tag. + +*An opening tag with the markdown attribute must start immediately on a line following a blank line.* + +#### Simple Example: +``` +This is *true* markdown text. + +
+This is *true* markdown text. +
+``` +#### Result: +``` +

This is true markdown text.

+
+

This is true markdown text.

+
+``` + +### Nested Markdown Inside HTML BLocks +Nested elements are more sensitive and must be used cautiously. Violation of the following will lead to unexpected behavior or unhandled exceptions. + * Only block mode elements may have further elements nested within them. + * The closing tag of inner elements must be followed by a blank line. + * More than one level of nesting is not supported (i.e., elements nested within elements nested within elements). This feature is not an alternative to templating. + +#### Complex Example: +``` +
+ +The text of the `Example` element. + +
+This text gets wrapped in `p` tags. +
+ +The tail of the `DefaultBlockMode` subelement. + +

+This text *is not* wrapped in additional `p` tags. +

+ +The tail of the `DefaultSpanMode` subelement. + +
+This `div` block is not wrapped in paragraph tags. +Note: Subelements are not required to have tail text. +
+ +

+This `p` block *is* foolishly wrapped in further paragraph tags. +

+ +The tail of the `BlockModeOverride` subelement. + +
+Raw html blocks may also be nested. +
+ +
+ +This text is after the markdown in html. +``` +#### Result: +``` +
+

The text of the Example element.

+
+

This text gets wrapped in p tags.

+
+

The tail of the DefaultBlockMode subelement.

+

+This text is not wrapped in additional p tags.

+

The tail of the DefaultSpanMode subelement.

+
+This div block is not wrapped in paragraph tags. +Note: Subelements are not required to have tail text.
+

+

This p block is foolishly wrapped in further paragraph tags.

+

+

The tail of the BlockModeOverride subelement.

+
+Raw html blocks may also be nested. +
+ +
+

This text is after the markdown in html.

+``` diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py index e6a1e82..dd70305 100644 --- a/markdown/extensions/extra.py +++ b/markdown/extensions/extra.py @@ -6,22 +6,22 @@ A compilation of various Python-Markdown extensions that imitates [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/). Note that each of the individual extensions still need to be available -on your PYTHONPATH. This extension simply wraps them all up as a +on your PYTHONPATH. This extension simply wraps them all up as a convenience so that only one extension needs to be listed when initiating Markdown. See the documentation for each individual extension for specifics about that extension. -In the event that one or more of the supported extensions are not -available for import, Markdown will issue a warning and simply continue -without that extension. +In the event that one or more of the supported extensions are not +available for import, Markdown will issue a warning and simply continue +without that extension. -There may be additional extensions that are distributed with +There may be additional extensions that are distributed with Python-Markdown that are not included here in Extra. Those extensions are not part of PHP Markdown Extra, and therefore, not part of Python-Markdown Extra. If you really would like Extra to include additional extensions, we suggest creating your own clone of Extra -under a differant name. You could also edit the `extensions` global -variable defined below, but be aware that such changes may be lost +under a differant name. You could also edit the `extensions` global +variable defined below, but be aware that such changes may be lost when you upgrade to any future version of Python-Markdown. """ @@ -29,6 +29,9 @@ when you upgrade to any future version of Python-Markdown. from __future__ import absolute_import from __future__ import unicode_literals from . import Extension +from ..blockprocessors import BlockProcessor +from .. import util +import re extensions = ['smart_strong', 'fenced_code', @@ -38,7 +41,7 @@ extensions = ['smart_strong', 'tables', 'abbr', ] - + class ExtraExtension(Extension): """ Add various extensions to Markdown class.""" @@ -49,6 +52,80 @@ class ExtraExtension(Extension): if not md.safeMode: # Turn on processing of markdown text within raw html md.preprocessors['html_block'].markdown_in_raw = True + md.parser.blockprocessors.add('markdown_block', + MarkdownInHtmlProcessor(md.parser), + '_begin') + md.parser.blockprocessors.tag_counter = -1 + md.parser.blockprocessors.contain_span_tags = re.compile( + r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE) + def makeExtension(configs={}): return ExtraExtension(configs=dict(configs)) + + +class MarkdownInHtmlProcessor(BlockProcessor): + """Process Markdown Inside HTML Blocks.""" + def test(self, parent, block): + return block == util.TAG_PLACEHOLDER % \ + str(self.parser.blockprocessors.tag_counter + 1) + + def _process_nests(self, element, block): + """Process the element's child elements in self.run.""" + # Build list of indexes of each nest within the parent element. + nest_index = [] # a list of tuples: (left index, right index) + i = self.parser.blockprocessors.tag_counter + 1 + while len(self.parser.markdown.htmlStash.tag_data) > i and self.\ + parser.markdown.htmlStash.tag_data[i]['left_index']: + left_child_index = \ + self.parser.markdown.htmlStash.tag_data[i]['left_index'] + right_child_index = \ + self.parser.markdown.htmlStash.tag_data[i]['right_index'] + nest_index.append((left_child_index - 1, right_child_index)) + i += 1 + + # Create each nest subelement. + i = 0 + for n in nest_index[:-1]: + self.run(element, block[n[0]:n[1]], + block[n[1]:nest_index[i + 1][0]], True) + i += 1 + self.run(element, block[nest_index[-1][0]:nest_index[-1][1]], # last + block[nest_index[-1][1]:], True) # nest + + def run(self, parent, blocks, tail=None, nest=False): + self.parser.blockprocessors.tag_counter += 1 + tag_data = self.parser.markdown.htmlStash.tag_data[ + self.parser.blockprocessors.tag_counter] + + # Create Element + markdown_value = tag_data['attrs'].pop('markdown') + element = util.etree.SubElement(parent, tag_data['tag'], + tag_data['attrs']) + + # Slice Off Block + if nest: + self.parser.parseBlocks(parent, tail) # Process Tail + block = blocks[1:] + else: # includes nests since a third level of nesting isn't supported + block = blocks[tag_data['left_index'] + 1: + tag_data['right_index']] + del blocks[:tag_data['right_index']] + + # Process Text + if (self.parser.blockprocessors.contain_span_tags.match( # Span Mode + tag_data['tag']) and markdown_value != 'block') or \ + markdown_value == 'span': + element.text = '\n'.join(block) + else: # Block Mode + i = self.parser.blockprocessors.tag_counter + 1 + if len(self.parser.markdown.htmlStash.tag_data) > i and self.\ + parser.markdown.htmlStash.tag_data[i]['left_index']: + first_subelement_index = self.parser.markdown.htmlStash.\ + tag_data[i]['left_index'] - 1 + self.parser.parseBlocks( + element, block[:first_subelement_index]) + if not nest: + block = self._process_nests(element, block) + else: + self.parser.parseBlocks(element, block) diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 72b2ed6..c532702 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -3,7 +3,7 @@ PRE-PROCESSORS ============================================================================= Preprocessors work on source text before we start doing anything too -complicated. +complicated. """ from __future__ import absolute_import @@ -62,7 +62,7 @@ class HtmlBlockPreprocessor(Preprocessor): right_tag_patterns = ["", "%s>"] attrs_pattern = r""" \s+(?P[^>"'/= ]+)=(?P['"])(?P.*?)(?P=q) # attr="value" - | # OR + | # OR \s+(?P[^>"'/= ]+)=(?P[^> ]+) # attr=value | # OR \s+(?P[^>"'/= ]+) # attr @@ -102,7 +102,7 @@ class HtmlBlockPreprocessor(Preprocessor): i = block.find(rtag, start_index) if i == -1: return -1 - j = block.find(ltag, start_index) + j = block.find(ltag, start_index) # if no ltag, or rtag found before another ltag, return index if (j > i or j == -1): return i + len(rtag) @@ -111,7 +111,7 @@ class HtmlBlockPreprocessor(Preprocessor): j = block.find('>', j) start_index = self._recursive_tagfind(ltag, rtag, j + 1, block) if start_index == -1: - # HTML potentially malformed- ltag has no corresponding + # HTML potentially malformed- ltag has no corresponding # rtag return -1 @@ -122,7 +122,7 @@ class HtmlBlockPreprocessor(Preprocessor): if i > 2: return tag.lstrip("<").rstrip(">"), i return block.rstrip()[-left_index:-1].lower(), len(block) - + def _equal_tags(self, left_tag, right_tag): if left_tag[0] in ['?', '@', '%']: # handle PHP, etc. return True @@ -139,6 +139,49 @@ class HtmlBlockPreprocessor(Preprocessor): def _is_oneliner(self, tag): return (tag in ['hr', 'hr/']) + def _stringindex_to_listindex(self, stringindex, items): + """ + Same effect as concatenating the strings in items, + finding the character to which stringindex refers in that string, + and returning the item in which that character resides. + """ + items.append('dummy') + i, count = 0, 0 + while count <= stringindex: + count += len(items[i]) + i += 1 + return i - 1 + + def _nested_markdown_in_html(self, items): + """Find and process html child elements of the given element block.""" + i = 0 + while i < len(items): + if self.left_tag_re.match(items[i]): + left_tag, left_index, attrs = \ + self._get_left_tag(''.join(items[i:])) + right_tag, data_index = self._get_right_tag( + left_tag, left_index, ''.join(items[i:])) + right_listindex = \ + self._stringindex_to_listindex(data_index, items[i:]) + i + if 'markdown' in attrs.keys(): + placeholder = self.markdown.htmlStash.store_tag( + left_tag, attrs, i + 1, right_listindex + 1) + items = items[:i] + [placeholder] + \ + [items[i][left_index:]] + items[i + 1:] + if len(items) - right_listindex <= 1: # last nest, no tail + right_listindex -= 1 + items[right_listindex] = items[right_listindex][ + :-len(right_tag) - 2] # remove closing tag + else: # raw html + if len(items) - right_listindex <= 1: # last element + right_listindex -= 1 + placeholder = self.markdown.htmlStash.store('\n\n'.join( + items[i:right_listindex])) + del items[i:right_listindex] + items.insert(i, placeholder) + i += 1 + return items + def run(self, lines): text = "\n".join(lines) new_blocks = [] @@ -165,14 +208,14 @@ class HtmlBlockPreprocessor(Preprocessor): left_tag, left_index, attrs = "--", 2, {} else: left_tag, left_index, attrs = self._get_left_tag(block) - right_tag, data_index = self._get_right_tag(left_tag, + right_tag, data_index = self._get_right_tag(left_tag, left_index, block) # keep checking conditions below and maybe just append - + if data_index < len(block) \ and (util.isBlockLevel(left_tag) - or left_tag == '--'): + or left_tag == '--'): text.insert(0, block[data_index:]) block = block[:data_index] @@ -186,22 +229,17 @@ class HtmlBlockPreprocessor(Preprocessor): continue if block.rstrip().endswith(">") \ - and self._equal_tags(left_tag, right_tag): + and self._equal_tags(left_tag, right_tag): if self.markdown_in_raw and 'markdown' in attrs.keys(): - start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', - '', block[:left_index]) - end = block[-len(right_tag)-2:] - block = block[left_index:-len(right_tag)-2] - new_blocks.append( - self.markdown.htmlStash.store(start)) - new_blocks.append(block) - new_blocks.append( - self.markdown.htmlStash.store(end)) + block = block[left_index:-len(right_tag) - 2] + new_blocks.append(self.markdown.htmlStash. + store_tag(left_tag, attrs, 0, 2)) + new_blocks.extend([block]) else: new_blocks.append( self.markdown.htmlStash.store(block.strip())) continue - else: + else: # if is block level tag and is not complete if util.isBlockLevel(left_tag) or left_tag == "--" \ @@ -214,7 +252,8 @@ class HtmlBlockPreprocessor(Preprocessor): continue - new_blocks.append(block) + else: + new_blocks.append(block) else: items.append(block) @@ -223,7 +262,7 @@ class HtmlBlockPreprocessor(Preprocessor): if self._equal_tags(left_tag, right_tag): # if find closing tag - + if data_index < len(block): # we have more text after right_tag items[-1] = block[:data_index] @@ -231,16 +270,17 @@ class HtmlBlockPreprocessor(Preprocessor): in_tag = False if self.markdown_in_raw and 'markdown' in attrs.keys(): - start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', - '', items[0][:left_index]) items[0] = items[0][left_index:] - end = items[-1][-len(right_tag)-2:] - items[-1] = items[-1][:-len(right_tag)-2] - new_blocks.append( - self.markdown.htmlStash.store(start)) - new_blocks.extend(items) - new_blocks.append( - self.markdown.htmlStash.store(end)) + items[-1] = items[-1][:-len(right_tag) - 2] + new_blocks.append(self.markdown.htmlStash.store_tag( + left_tag, attrs, 0, len(items) + 2)) + placeholderslen = len(self.markdown.htmlStash.tag_data) + new_blocks.extend( + self._nested_markdown_in_html(items)) + nests = len(self.markdown.htmlStash.tag_data) - \ + placeholderslen + self.markdown.htmlStash.tag_data[-1 - nests][ + 'right_index'] += nests - 2 else: new_blocks.append( self.markdown.htmlStash.store('\n\n'.join(items))) @@ -248,21 +288,19 @@ class HtmlBlockPreprocessor(Preprocessor): if items: if self.markdown_in_raw and 'markdown' in attrs.keys(): - start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', - '', items[0][:left_index]) items[0] = items[0][left_index:] - end = items[-1][-len(right_tag)-2:] - items[-1] = items[-1][:-len(right_tag)-2] + items[-1] = items[-1][:-len(right_tag) - 2] new_blocks.append( - self.markdown.htmlStash.store(start)) - new_blocks.extend(items) - if end.strip(): - new_blocks.append( - self.markdown.htmlStash.store(end)) + self.markdown.htmlStash.store_tag( + left_tag, attrs, 0, len(items) + 2)) + placeholderslen = len(self.markdown.htmlStash.tag_data) + new_blocks.extend(self._nested_markdown_in_html(items)) + nests = len(self.markdown.htmlStash.tag_data) - placeholderslen + self.markdown.htmlStash.tag_data[-1 - nests][ + 'right_index'] += nests - 2 else: new_blocks.append( self.markdown.htmlStash.store('\n\n'.join(items))) - #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) new_blocks.append('\n') new_text = "\n\n".join(new_blocks) diff --git a/markdown/util.py b/markdown/util.py index d0ef8a3..8c2e6d7 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -40,6 +40,7 @@ INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') AMP_SUBSTITUTE = STX+"amp"+ETX HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') +TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX """ @@ -119,10 +120,12 @@ class HtmlStash(object): in the beginning and replace with place-holders. """ - def __init__ (self): + def __init__(self): """ Create a HtmlStash. """ - self.html_counter = 0 # for counting inline html segments - self.rawHtmlBlocks=[] + self.html_counter = 0 # for counting inline html segments + self.rawHtmlBlocks = [] + self.tag_counter = 0 + self.tag_data = [] # list of dictionaries in the order tags appear def store(self, html, safe=False): """ @@ -150,3 +153,11 @@ class HtmlStash(object): def get_placeholder(self, key): return HTML_PLACEHOLDER % key + def store_tag(self, tag, attrs, left_index, right_index): + """Store tag data and return a placeholder.""" + self.tag_data.append({'tag': tag, 'attrs': attrs, + 'left_index': left_index, + 'right_index': right_index}) + placeholder = TAG_PLACEHOLDER % str(self.tag_counter) + self.tag_counter += 1 # equal to the tag's index in self.tag_data + return placeholder diff --git a/tests/extensions/extra/raw-html.html b/tests/extensions/extra/raw-html.html index b2a7c4d..897ad24 100644 --- a/tests/extensions/extra/raw-html.html +++ b/tests/extensions/extra/raw-html.html @@ -1,14 +1,31 @@
-

foo

-
-

bar

-
-

blah

-
\ No newline at end of file + +
+

The text of the Example element.

+
+

This text gets wrapped in p tags.

+
+

The tail of the DefaultBlockMode subelement.

+

+This text is not wrapped in additional p tags.

+

The tail of the DefaultSpanMode subelement.

+
+This div block is not wrapped in paragraph tags. +Note: Subelements are not required to have tail text.
+

+

This p block is foolishly wrapped in further paragraph tags.

+

+

The tail of the BlockModeOverride subelement.

+
+Raw html blocks may also be nested. +
+ +
+

This text is after the markdown in html.

\ No newline at end of file diff --git a/tests/extensions/extra/raw-html.txt b/tests/extensions/extra/raw-html.txt index 284fe0c..e906910 100644 --- a/tests/extensions/extra/raw-html.txt +++ b/tests/extensions/extra/raw-html.txt @@ -9,4 +9,38 @@ _bar_ _blah_ + +
+The text of the `Example` element. + +
+This text gets wrapped in `p` tags. +
+ +The tail of the `DefaultBlockMode` subelement. + +

+This text *is not* wrapped in additional `p` tags. +

+ +The tail of the `DefaultSpanMode` subelement. + +
+This `div` block is not wrapped in paragraph tags. +Note: Subelements are not required to have tail text. +
+ +

+This `p` block *is* foolishly wrapped in further paragraph tags. +

+ +The tail of the `BlockModeOverride` subelement. + +
+Raw html blocks may also be nested. +
+ +
+ +This text is after the markdown in html. -- cgit v1.2.3