Extended headerid's rawHTML in id handling to toc ext.

author: Waylan Limberg <waylan@gmail.com> 2013-08-11 08:35:15 -0400
committer: Waylan Limberg <waylan@gmail.com> 2013-08-11 08:35:15 -0400
commit: 72012c35273cef24b57af833cb38ac4cca6db98e (patch)
tree: 20165ae59468bc5d6f0964a8258c05e78030c743 /markdown/extensions/headerid.py
parent: d12d2c95b42c5fc0910b13859001755d71ad3438 (diff)
download: markdown-72012c35273cef24b57af833cb38ac4cca6db98e.tar.gz
markdown-72012c35273cef24b57af833cb38ac4cca6db98e.tar.bz2
markdown-72012c35273cef24b57af833cb38ac4cca6db98e.zip
1 files changed, 17 insertions, 14 deletions
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py
index 8020d93..0476f9e 100644
--- a/markdown/extensions/headerid.py
+++ b/markdown/extensions/headerid.py
@@ -122,6 +122,22 @@ def itertext(elem):
             yield e.tail
 
 
+def stashedHTML2text(text, md):
+    """ Extract raw HTML, reduce to plain text and swap with placeholder. """
+    def _html_sub(m):
+        """ Substitute raw html with plain text. """
+        try:
+    	    raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
+        except (IndexError, TypeError):
+            return m.group(0)
+        if md.safeMode and not safe:
+            return ''
+        # Strip out tags and entities - leaveing text
+        return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
+
+    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
+
+
 class HeaderIdTreeprocessor(Treeprocessor):
     """ Assign IDs to headers. """
 
@@ -131,26 +147,13 @@ class HeaderIdTreeprocessor(Treeprocessor):
         start_level, force_id = self._get_meta()
         slugify = self.config['slugify']
         sep = self.config['separator']
-
-        def _html_sub(m):
-            """ Substitute raw html with plain text. """
-            try:
-                raw, safe = self.md.htmlStash.rawHtmlBlocks[int(m.group(1))]
-            except (IndexError, TypeError):
-                return m.group(0)
-            if self.md.safeMode and not safe:
-                return ''
-            # Strip out tags and entities - leaveing text
-            return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
-
         for elem in doc.getiterator():
             if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
                 if force_id:
                     if "id" in elem.attrib:
                         id = elem.get('id')
                     else:
-                        id = HTML_PLACEHOLDER_RE.sub(_html_sub, 
-                                                     ''.join(itertext(elem)))
+                        id = stashedHTML2text(''.join(itertext(elem)), self.md)
                         id = slugify(id, sep)
                     elem.set('id', unique(id, self.IDs))
                 if start_level:
author	Waylan Limberg <waylan@gmail.com>	2013-08-11 08:35:15 -0400
committer	Waylan Limberg <waylan@gmail.com>	2013-08-11 08:35:15 -0400
commit	72012c35273cef24b57af833cb38ac4cca6db98e (patch)
tree	20165ae59468bc5d6f0964a8258c05e78030c743 /markdown/extensions/headerid.py
parent	d12d2c95b42c5fc0910b13859001755d71ad3438 (diff)
download	markdown-72012c35273cef24b57af833cb38ac4cca6db98e.tar.gz markdown-72012c35273cef24b57af833cb38ac4cca6db98e.tar.bz2 markdown-72012c35273cef24b57af833cb38ac4cca6db98e.zip