diff options
-rw-r--r-- | markdown/extensions/headerid.py | 31 | ||||
-rw-r--r-- | markdown/extensions/toc.py | 5 |
2 files changed, 20 insertions, 16 deletions
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index 8020d93..0476f9e 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -122,6 +122,22 @@ def itertext(elem): yield e.tail +def stashedHTML2text(text, md): + """ Extract raw HTML, reduce to plain text and swap with placeholder. """ + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): + return m.group(0) + if md.safeMode and not safe: + return '' + # Strip out tags and entities - leaveing text + return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + + return HTML_PLACEHOLDER_RE.sub(_html_sub, text) + + class HeaderIdTreeprocessor(Treeprocessor): """ Assign IDs to headers. """ @@ -131,26 +147,13 @@ class HeaderIdTreeprocessor(Treeprocessor): start_level, force_id = self._get_meta() slugify = self.config['slugify'] sep = self.config['separator'] - - def _html_sub(m): - """ Substitute raw html with plain text. """ - try: - raw, safe = self.md.htmlStash.rawHtmlBlocks[int(m.group(1))] - except (IndexError, TypeError): - return m.group(0) - if self.md.safeMode and not safe: - return '' - # Strip out tags and entities - leaveing text - return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) - for elem in doc.getiterator(): if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if force_id: if "id" in elem.attrib: id = elem.get('id') else: - id = HTML_PLACEHOLDER_RE.sub(_html_sub, - ''.join(itertext(elem))) + id = stashedHTML2text(''.join(itertext(elem)), self.md) id = slugify(id, sep) elem.set('id', unique(id, self.IDs)) if start_level: diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 73b0844..99afba0 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -14,7 +14,7 @@ from __future__ import unicode_literals from . import Extension from ..treeprocessors import Treeprocessor from ..util import etree -from .headerid import slugify, unique, itertext +from .headerid import slugify, unique, itertext, stashedHTML2text import re @@ -160,7 +160,8 @@ class TocTreeprocessor(Treeprocessor): # Do not override pre-existing ids if not "id" in c.attrib: - elem_id = unique(self.config["slugify"](text, '-'), used_ids) + elem_id = stashedHTML2text(text, self.markdown) + elem_id = unique(self.config["slugify"](elem_id, '-'), used_ids) c.attrib["id"] = elem_id else: elem_id = c.attrib["id"] |