From c2d50b46b536a440b6d73c6bf309bdaf03b90abb Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Sun, 3 Jan 2010 23:00:22 -0500 Subject: Cleanup and additional work on previous commit. NOTE: removed special treatment if raw
s with multiple line breaks - they no longer automagicly process their content as markdown. This matches other implementations. Finished rest of code for use by an extension - to be added later. --- markdown/preprocessors.py | 78 +++++++++++++++++++++++++++++--------- tests/misc/div.html | 3 +- tests/misc/multi-line-tags.html | 3 +- tests/misc/multiline-comments.html | 5 ++- 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 072784b..cbf40e3 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -78,13 +78,16 @@ class HtmlBlockPreprocessor(Preprocessor): right_tag_patterns = ["", "%s>"] attrs_pattern = r""" - \s+(?P [^>"'/ ]+)=(?P['"])(?P.*?)(?P=q) # attr="value" + \s+(?P[^>"'/= ]+)=(?P['"])(?P.*?)(?P=q) # attr="value" | # OR - \s+(?P [^>"'/ ]+) # attr + \s+(?P[^>"'/= ]+)=(?P[^> ]+) # attr=value + | # OR + \s+(?P[^>"'/= ]+) # attr """ left_tag_pattern = r'^\<(?P[^> ]+)(?P(%s)*)\s*\/?\>?' % attrs_pattern attrs_re = re.compile(attrs_pattern, re.VERBOSE) left_tag_re = re.compile(left_tag_pattern, re.VERBOSE) + markdown_in_raw = False def _get_left_tag(self, block): m = self.left_tag_re.match(block) @@ -94,17 +97,23 @@ class HtmlBlockPreprocessor(Preprocessor): attrs = {} if raw_attrs: for ma in self.attrs_re.finditer(raw_attrs): - if ma.group('value'): - attrs[ma.group('attr').strip()] = ma.group('value') - elif ma.group('attr'): - attrs[ma.group('attr').strip()] = "" + if ma.group('attr'): + if ma.group('value'): + attrs[ma.group('attr').strip()] = ma.group('value') + else: + attrs[ma.group('attr').strip()] = "" + elif ma.group('attr1'): + if ma.group('value1'): + attrs[ma.group('attr1').strip()] = ma.group('value1') + else: + attrs[ma.group('attr1').strip()] = "" + elif ma.group('attr2'): + attrs[ma.group('attr2').strip()] = "" return tag, len(m.group(0)), attrs else: tag = block[1:].replace(">", " ", 1).split()[0].lower() return tag, len(tag+2), {} - #return block[1:].replace(">", " ", 1).split()[0].lower() - def _get_right_tag(self, left_tag, left_index, block): for p in self.right_tag_patterns: tag = p % left_tag @@ -114,7 +123,7 @@ class HtmlBlockPreprocessor(Preprocessor): return block.rstrip()[-left_index:-1].lower(), len(block) def _equal_tags(self, left_tag, right_tag): - if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc. + if left_tag[0] in ['?', '@', '%']: # handle PHP, etc. return True if ("/" + left_tag) == right_tag: return True @@ -137,7 +146,7 @@ class HtmlBlockPreprocessor(Preprocessor): left_tag = '' right_tag = '' in_tag = False # flag - #import pdb; pdb.set_trace() + while text: block = text[0] if block.startswith("\n"): @@ -175,13 +184,24 @@ class HtmlBlockPreprocessor(Preprocessor): if self._is_oneliner(left_tag): new_blocks.append(block.strip()) continue - + if block.rstrip().endswith(">") \ and self._equal_tags(left_tag, right_tag): - new_blocks.append( - self.markdown.htmlStash.store(block.strip())) + if self.markdown_in_raw and 'markdown' in attrs.keys(): + start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', + '', block[:left_index]) + end = block[-len(right_tag)-2:] + block = block[left_index:-len(right_tag)-2] + new_blocks.append( + self.markdown.htmlStash.store(start)) + new_blocks.append(block) + new_blocks.append( + self.markdown.htmlStash.store(end)) + else: + new_blocks.append( + self.markdown.htmlStash.store(block.strip())) continue - else: #if not block[1] == "!": + else: # if is block level tag and is not complete if markdown.isBlockLevel(left_tag) or left_tag == "--" \ @@ -206,12 +226,36 @@ class HtmlBlockPreprocessor(Preprocessor): if self._equal_tags(left_tag, right_tag): # if find closing tag in_tag = False - new_blocks.append( - self.markdown.htmlStash.store('\n\n'.join(items))) + if self.markdown_in_raw and 'markdown' in attrs.keys(): + start = items[0][:left_index] + items[0] = items[0][left_index:] + end = items[-1][-len(right_tag)-2:] + items[-1] = items[-1][:-len(right_tag)-2] + new_blocks.append( + self.markdown.htmlStash.store(start)) + new_blocks.extend(items) + new_blocks.append( + self.markdown.htmlStash.store(end)) + else: + new_blocks.append( + self.markdown.htmlStash.store('\n\n'.join(items))) items = [] if items: - new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) + if self.markdown_in_raw and 'markdown' in attrs.keys(): + start = items[0][:left_index] + items[0] = items[0][left_index:] + end = items[-1][-len(right_tag)-2:] + items[-1] = items[-1][:-len(right_tag)-2] + new_blocks.append( + self.markdown.htmlStash.store(start)) + new_blocks.extend(items) + new_blocks.append( + self.markdown.htmlStash.store(end)) + else: + new_blocks.append( + self.markdown.htmlStash.store('\n\n'.join(items))) + #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items))) new_blocks.append('\n') new_text = "\n\n".join(new_blocks) diff --git a/tests/misc/div.html b/tests/misc/div.html index 7cd0d6d..7b68854 100644 --- a/tests/misc/div.html +++ b/tests/misc/div.html @@ -1,4 +1,5 @@ \ No newline at end of file diff --git a/tests/misc/multi-line-tags.html b/tests/misc/multi-line-tags.html index 763a050..784c1dd 100644 --- a/tests/misc/multi-line-tags.html +++ b/tests/misc/multi-line-tags.html @@ -1,4 +1,5 @@
-

asdf asdfasd

+asdf asdfasd +
\ No newline at end of file diff --git a/tests/misc/multiline-comments.html b/tests/misc/multiline-comments.html index 547ba0b..12f8cb5 100644 --- a/tests/misc/multiline-comments.html +++ b/tests/misc/multiline-comments.html @@ -2,7 +2,7 @@ foo ---> +-->

@@ -12,5 +12,6 @@ foo

-

foo

+foo +
\ No newline at end of file -- cgit v1.2.3