diff options
author | Waylan Limberg <waylan@gmail.com> | 2013-07-28 15:09:25 -0400 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2013-07-28 15:09:25 -0400 |
commit | a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840 (patch) | |
tree | 8f1b55691e95a91ccc010bb5f95f33383bf862e8 | |
parent | df8423b93eb11fede3a8eeae416e5985ffab64da (diff) | |
download | markdown-a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840.tar.gz markdown-a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840.tar.bz2 markdown-a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840.zip |
HeaderID Ext now handles raw html in ids. Fixes #232
-rw-r--r-- | markdown/extensions/headerid.py | 17 | ||||
-rw-r--r-- | markdown/util.py | 5 | ||||
-rw-r--r-- | tests/test_extensions.py | 12 |
3 files changed, 32 insertions, 2 deletions
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index 7681b8d..8020d93 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -78,6 +78,7 @@ from __future__ import absolute_import from __future__ import unicode_literals from . import Extension from ..treeprocessors import Treeprocessor +from ..util import HTML_PLACEHOLDER_RE import re import logging import unicodedata @@ -130,13 +131,27 @@ class HeaderIdTreeprocessor(Treeprocessor): start_level, force_id = self._get_meta() slugify = self.config['slugify'] sep = self.config['separator'] + + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw, safe = self.md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): + return m.group(0) + if self.md.safeMode and not safe: + return '' + # Strip out tags and entities - leaveing text + return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + for elem in doc.getiterator(): if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if force_id: if "id" in elem.attrib: id = elem.get('id') else: - id = slugify(''.join(itertext(elem)), sep) + id = HTML_PLACEHOLDER_RE.sub(_html_sub, + ''.join(itertext(elem))) + id = slugify(id, sep) elem.set('id', unique(id, self.IDs)) if start_level: level = int(elem.tag[-1]) + start_level diff --git a/markdown/util.py b/markdown/util.py index 1036197..7cb933a 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -38,6 +38,9 @@ INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') AMP_SUBSTITUTE = STX+"amp"+ETX +HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX +HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') + """ Constants you probably do not need to change @@ -132,5 +135,5 @@ class HtmlStash(object): self.rawHtmlBlocks = [] def get_placeholder(self, key): - return "%swzxhzdk:%d%s" % (STX, key, ETX) + return HTML_PLACEHOLDER % key diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 4eb600b..add759a 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -236,6 +236,18 @@ class TestHeaderId(unittest.TestCase): '<h1 id="some-header-with-markup">Some <em>Header</em> with ' '<a href="http://example.com">markup</a>.</h1>') + def testHtmlEntities(self): + """ Test HeaderIDs with HTML Entities. """ + text = '# Foo & bar' + self.assertEqual(self.md.convert(text), + '<h1 id="foo-bar">Foo & bar</h1>') + + def testRawHtml(self): + """ Test HeaderIDs with raw HTML. """ + text = '# Foo <b>Bar</b> Baz.' + self.assertEqual(self.md.convert(text), + '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>') + def testNoAutoIds(self): """ Test HeaderIDs with no auto generated IDs. """ |