diff options
author | Waylan Limberg <waylan@gmail.com> | 2014-01-12 18:17:37 -0500 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2014-01-12 18:17:37 -0500 |
commit | 28d61f5b6b1e0863857c9e1368b3f7698c2f67dd (patch) | |
tree | f85576ea19da02d9717ebf34e9b0edfdc3ec7d28 | |
parent | d0e088d535a478b9435ff49fd27583f5cb9c2641 (diff) | |
download | markdown-28d61f5b6b1e0863857c9e1368b3f7698c2f67dd.tar.gz markdown-28d61f5b6b1e0863857c9e1368b3f7698c2f67dd.tar.bz2 markdown-28d61f5b6b1e0863857c9e1368b3f7698c2f67dd.zip |
Improved multiline comment parsing.
Fixes #257 and slightly alters comment parsing behavior.
Unlike self-closing tags, a comment can contain angle brackets between the
opening and closing tags. The greaterthan angle bracket at the end of the
first block should not be mistaken for closing the comment. Need to actually
check for a comment closing tag (`-->`). If one if not found, then the comment
keeps going (to the end of the document if nessecary) just like in HTML.
That last bit is a slight change from previous behavior, but should be
unsurprising as that's how broswers parse html comments. And as far as
I can tell, more implementations follow this behavior than any other. The
ones that don't seem to be all over the place.
-rw-r--r-- | markdown/preprocessors.py | 7 | ||||
-rw-r--r-- | tests/misc/more_comments.html | 8 | ||||
-rw-r--r-- | tests/misc/more_comments.txt | 10 | ||||
-rw-r--r-- | tests/misc/multiline-comments.html | 22 | ||||
-rw-r--r-- | tests/misc/multiline-comments.txt | 20 |
5 files changed, 55 insertions, 12 deletions
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 1b5bc7e..1e10cfc 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -201,7 +201,7 @@ class HtmlBlockPreprocessor(Preprocessor): if not in_tag: if block.startswith("<") and len(block.strip()) > 1: - if block[1] == "!": + if block[1:4] == "!--": # is a comment block left_tag, left_index, attrs = "--", 2, {} else: @@ -239,9 +239,8 @@ class HtmlBlockPreprocessor(Preprocessor): continue else: # if is block level tag and is not complete - - if util.isBlockLevel(left_tag) or left_tag == "--" \ - and not block.rstrip().endswith(">"): + if (not self._equal_tags(left_tag, right_tag)) and \ + (util.isBlockLevel(left_tag) or left_tag == "--"): items.append(block.strip()) in_tag = True else: diff --git a/tests/misc/more_comments.html b/tests/misc/more_comments.html index 99f5781..5ca6731 100644 --- a/tests/misc/more_comments.html +++ b/tests/misc/more_comments.html @@ -1,6 +1,8 @@ -<!--asd@asdfd.com> - <!asd@asdfd.com> +<p>Foo</p> <p><asd!@asdfd.com></p> -<p>Test</p>
\ No newline at end of file +<p>Bar</p> +<!--asd@asdfd.com> + +Still in unclosed comment
\ No newline at end of file diff --git a/tests/misc/more_comments.txt b/tests/misc/more_comments.txt index 0397f9c..ddc5bd3 100644 --- a/tests/misc/more_comments.txt +++ b/tests/misc/more_comments.txt @@ -1,9 +1,11 @@ -<!--asd@asdfd.com> - - <!asd@asdfd.com> +Foo <asd!@asdfd.com> -Test +Bar + +<!--asd@asdfd.com> + +Still in unclosed comment diff --git a/tests/misc/multiline-comments.html b/tests/misc/multiline-comments.html index 29c17e9..4bdd5d0 100644 --- a/tests/misc/multiline-comments.html +++ b/tests/misc/multiline-comments.html @@ -14,4 +14,24 @@ foo foo -</div>
\ No newline at end of file +</div> + +<!-- foo + +--> + +<!-- <tag> + +--> + +<!-- + +foo --> + +<!-- + +<tag> --> + +<!-- unclosed comment + +__Still__ a comment (browsers see it that way)
\ No newline at end of file diff --git a/tests/misc/multiline-comments.txt b/tests/misc/multiline-comments.txt index 71bc418..eb567dd 100644 --- a/tests/misc/multiline-comments.txt +++ b/tests/misc/multiline-comments.txt @@ -16,3 +16,23 @@ foo foo </div> + +<!-- foo + +--> + +<!-- <tag> + +--> + +<!-- + +foo --> + +<!-- + +<tag> --> + +<!-- unclosed comment + +__Still__ a comment (browsers see it that way) |