From a4d4b61b5ce4a7dd96ddc13c66b8716d3ad8eb51 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 25 Jan 2018 15:38:05 -0500 Subject: Update block level elements. Also refactor from regex to a list and add comments to explain why the elements are in the list for future reference. Fixes #543. --- markdown/util.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/markdown/util.py b/markdown/util.py index 3a36c00..6591cf5 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -28,15 +28,18 @@ Constants you might want to modify """ -BLOCK_LEVEL_ELEMENTS = re.compile( - r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" - r"|script|noscript|form|fieldset|iframe|math" - r"|hr|hr/|style|li|dt|dd|thead|tbody" - r"|tr|th|td|section|footer|header|group|figure" - r"|figcaption|aside|article|canvas|output" - r"|progress|video|nav|main)$", - re.IGNORECASE -) +BLOCK_LEVEL_ELEMENTS = [ + # Elements which are invalid to wrap in a `

` tag. + # See http://w3c.github.io/html/grouping-content.html#the-p-element + 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', + 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', + 'h4', 'h5', 'h6', 'header', 'hr', 'main', 'menu', 'nav', 'ol', 'p', 'pre', + 'section', 'table', 'ul', + # Other elements which Markdown should not be mucking up the contents of. + 'canvas', 'dd', 'dt', 'group', 'iframe', 'li', 'math', 'noscript', 'output', + 'progress', 'script', 'tbody', 'td', 'th', 'thead', 'tr', 'video' +] + # Placeholders STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder @@ -89,7 +92,7 @@ AUXILIARY GLOBAL FUNCTIONS def isBlockLevel(tag): """Check if the tag is a block level HTML tag.""" if isinstance(tag, string_type): - return BLOCK_LEVEL_ELEMENTS.match(tag) + return tag.lower().rstrip('/') in BLOCK_LEVEL_ELEMENTS # Some ElementTree tags are not strings, so return False. return False -- cgit v1.2.3