diff options
author | Waylan Limberg <waylan@gmail.com> | 2013-08-11 08:35:15 -0400 |
---|---|---|
committer | Waylan Limberg <waylan@gmail.com> | 2013-08-11 08:35:15 -0400 |
commit | 72012c35273cef24b57af833cb38ac4cca6db98e (patch) | |
tree | 20165ae59468bc5d6f0964a8258c05e78030c743 /markdown/extensions/headerid.py | |
parent | d12d2c95b42c5fc0910b13859001755d71ad3438 (diff) | |
download | markdown-72012c35273cef24b57af833cb38ac4cca6db98e.tar.gz markdown-72012c35273cef24b57af833cb38ac4cca6db98e.tar.bz2 markdown-72012c35273cef24b57af833cb38ac4cca6db98e.zip |
Extended headerid's rawHTML in id handling to toc ext.
Diffstat (limited to 'markdown/extensions/headerid.py')
-rw-r--r-- | markdown/extensions/headerid.py | 31 |
1 files changed, 17 insertions, 14 deletions
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index 8020d93..0476f9e 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -122,6 +122,22 @@ def itertext(elem): yield e.tail +def stashedHTML2text(text, md): + """ Extract raw HTML, reduce to plain text and swap with placeholder. """ + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): + return m.group(0) + if md.safeMode and not safe: + return '' + # Strip out tags and entities - leaveing text + return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + + return HTML_PLACEHOLDER_RE.sub(_html_sub, text) + + class HeaderIdTreeprocessor(Treeprocessor): """ Assign IDs to headers. """ @@ -131,26 +147,13 @@ class HeaderIdTreeprocessor(Treeprocessor): start_level, force_id = self._get_meta() slugify = self.config['slugify'] sep = self.config['separator'] - - def _html_sub(m): - """ Substitute raw html with plain text. """ - try: - raw, safe = self.md.htmlStash.rawHtmlBlocks[int(m.group(1))] - except (IndexError, TypeError): - return m.group(0) - if self.md.safeMode and not safe: - return '' - # Strip out tags and entities - leaveing text - return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) - for elem in doc.getiterator(): if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if force_id: if "id" in elem.attrib: id = elem.get('id') else: - id = HTML_PLACEHOLDER_RE.sub(_html_sub, - ''.join(itertext(elem))) + id = stashedHTML2text(''.join(itertext(elem)), self.md) id = slugify(id, sep) elem.set('id', unique(id, self.IDs)) if start_level: |