aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2017-01-24 08:36:37 -0700
committerWaylan Limberg <waylan.limberg@icloud.com>2017-01-24 10:36:37 -0500
commit94962cb644b3117e7dd46f16b1a4341609ad6bf2 (patch)
treeced3da4ddaf0dda4043e1a1d94bc3145330a1d62
parentfacfd66d5e41e03d69901fe2f4ae0e24106a21f2 (diff)
downloadmarkdown-94962cb644b3117e7dd46f16b1a4341609ad6bf2.tar.gz
markdown-94962cb644b3117e7dd46f16b1a4341609ad6bf2.tar.bz2
markdown-94962cb644b3117e7dd46f16b1a4341609ad6bf2.zip
Fix HTML parse with empty lines (#537)
If both open and close was not found in first block, additional blocks were evaluated without context of previous blocks. The algorithm needs to evaluate a buffer with the left bracket present. So feed in all items and get the right bracket, then adjust the data_index to be relative to the last block. Fixes #452.
-rw-r--r--markdown/preprocessors.py8
-rw-r--r--tests/misc/html.html11
-rw-r--r--tests/misc/html.txt11
3 files changed, 29 insertions, 1 deletions
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index 7ea4fcf..94f9830 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -258,7 +258,13 @@ class HtmlBlockPreprocessor(Preprocessor):
else:
items.append(block)
- right_tag, data_index = self._get_right_tag(left_tag, 0, block)
+ # Need to evaluate all items so we can calculate relative to the left index.
+ right_tag, data_index = self._get_right_tag(left_tag, left_index, ''.join(items))
+ # Adjust data_index: relative to items -> relative to last block
+ prev_block_length = 0
+ for item in items[:-1]:
+ prev_block_length += len(item)
+ data_index -= prev_block_length
if self._equal_tags(left_tag, right_tag):
# if find closing tag
diff --git a/tests/misc/html.html b/tests/misc/html.html
index 1eb6a97..5380bbd 100644
--- a/tests/misc/html.html
+++ b/tests/misc/html.html
@@ -8,6 +8,17 @@
Html with various attributes.
</div>
+<div>
+ <div>
+ Div with a blank line
+
+ in the middle.
+ </div>
+ <div>
+ This gets treated as HTML.
+ </div>
+</div>
+
<p>And of course <script>blah</script>.</p>
<p><a href="script&gt;stuff&lt;/script">this <script>link</a></p>
<p>Some funky <x\]> inline stuff with markdown escaping syntax.</p>
diff --git a/tests/misc/html.txt b/tests/misc/html.txt
index dfee68d..8f18fa7 100644
--- a/tests/misc/html.txt
+++ b/tests/misc/html.txt
@@ -11,6 +11,17 @@ Now some <arbitrary>arbitrary tags</arbitrary>.
Html with various attributes.
</div>
+<div>
+ <div>
+ Div with a blank line
+
+ in the middle.
+ </div>
+ <div>
+ This gets treated as HTML.
+ </div>
+</div>
+
And of course <script>blah</script>.
[this <script>link](<script>stuff</script>)