aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2013-07-28 15:09:25 -0400
committerWaylan Limberg <waylan@gmail.com>2013-07-28 15:09:25 -0400
commita4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840 (patch)
tree8f1b55691e95a91ccc010bb5f95f33383bf862e8
parentdf8423b93eb11fede3a8eeae416e5985ffab64da (diff)
downloadmarkdown-a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840.tar.gz
markdown-a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840.tar.bz2
markdown-a4ceb0b2a5f2c2ae8aa3981182cf829fdd28e840.zip
HeaderID Ext now handles raw html in ids. Fixes #232
-rw-r--r--markdown/extensions/headerid.py17
-rw-r--r--markdown/util.py5
-rw-r--r--tests/test_extensions.py12
3 files changed, 32 insertions, 2 deletions
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py
index 7681b8d..8020d93 100644
--- a/markdown/extensions/headerid.py
+++ b/markdown/extensions/headerid.py
@@ -78,6 +78,7 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
from ..treeprocessors import Treeprocessor
+from ..util import HTML_PLACEHOLDER_RE
import re
import logging
import unicodedata
@@ -130,13 +131,27 @@ class HeaderIdTreeprocessor(Treeprocessor):
start_level, force_id = self._get_meta()
slugify = self.config['slugify']
sep = self.config['separator']
+
+ def _html_sub(m):
+ """ Substitute raw html with plain text. """
+ try:
+ raw, safe = self.md.htmlStash.rawHtmlBlocks[int(m.group(1))]
+ except (IndexError, TypeError):
+ return m.group(0)
+ if self.md.safeMode and not safe:
+ return ''
+ # Strip out tags and entities - leaveing text
+ return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
+
for elem in doc.getiterator():
if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
if force_id:
if "id" in elem.attrib:
id = elem.get('id')
else:
- id = slugify(''.join(itertext(elem)), sep)
+ id = HTML_PLACEHOLDER_RE.sub(_html_sub,
+ ''.join(itertext(elem)))
+ id = slugify(id, sep)
elem.set('id', unique(id, self.IDs))
if start_level:
level = int(elem.tag[-1]) + start_level
diff --git a/markdown/util.py b/markdown/util.py
index 1036197..7cb933a 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -38,6 +38,9 @@ INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
AMP_SUBSTITUTE = STX+"amp"+ETX
+HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
+HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
+
"""
Constants you probably do not need to change
@@ -132,5 +135,5 @@ class HtmlStash(object):
self.rawHtmlBlocks = []
def get_placeholder(self, key):
- return "%swzxhzdk:%d%s" % (STX, key, ETX)
+ return HTML_PLACEHOLDER % key
diff --git a/tests/test_extensions.py b/tests/test_extensions.py
index 4eb600b..add759a 100644
--- a/tests/test_extensions.py
+++ b/tests/test_extensions.py
@@ -236,6 +236,18 @@ class TestHeaderId(unittest.TestCase):
'<h1 id="some-header-with-markup">Some <em>Header</em> with '
'<a href="http://example.com">markup</a>.</h1>')
+ def testHtmlEntities(self):
+ """ Test HeaderIDs with HTML Entities. """
+ text = '# Foo &amp; bar'
+ self.assertEqual(self.md.convert(text),
+ '<h1 id="foo-bar">Foo &amp; bar</h1>')
+
+ def testRawHtml(self):
+ """ Test HeaderIDs with raw HTML. """
+ text = '# Foo <b>Bar</b> Baz.'
+ self.assertEqual(self.md.convert(text),
+ '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>')
+
def testNoAutoIds(self):
""" Test HeaderIDs with no auto generated IDs. """