aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/extensions/header_id.txt9
-rw-r--r--docs/extensions/toc.txt92
-rw-r--r--docs/release-2.6.txt43
-rw-r--r--markdown/extensions/headerid.py66
-rw-r--r--markdown/extensions/toc.py77
-rw-r--r--tests/test_extensions.py159
6 files changed, 289 insertions, 157 deletions
diff --git a/docs/extensions/header_id.txt b/docs/extensions/header_id.txt
index 2881c50..42e640e 100644
--- a/docs/extensions/header_id.txt
+++ b/docs/extensions/header_id.txt
@@ -15,6 +15,13 @@ elements (`h1`-`h6`) in the resulting HTML document.
This extension is included in the standard Markdown library.
+!!! warning
+ This extension is **Pending Deprecation**. The [Table of Contents][toc]
+ Extension should be used instead, which offers most the features of this
+ extension and more.
+
+[toc]: toc.html
+
Syntax
------
@@ -55,7 +62,7 @@ The following options are provided to configure the output:
>>> text = '''
... #Some Header
... ## Next Level'''
- >>> from markdown.extensions.headerid import HeaderIdExtension
+ >>> from markdown.extensions.headerid import HeaderIdExtension
>>> html = markdown.markdown(text, extensions=[HeaderIdExtension(level=3)])
>>> print html
<h3 id="some_header">Some Header</h3>
diff --git a/docs/extensions/toc.txt b/docs/extensions/toc.txt
index 56a8ee0..c6a99bf 100644
--- a/docs/extensions/toc.txt
+++ b/docs/extensions/toc.txt
@@ -18,6 +18,20 @@ This extension is included in the standard Markdown library.
Syntax
------
+By default, all headers will automatically have unique `id` attributes
+generated based upon the text of the header. Note this example, in which all
+three headers would have the same `id`:
+
+ #Header
+ #Header
+ #Header
+
+Results in:
+
+ <h1 id="header">Header</h1>
+ <h1 id="header_1">Header</h1>
+ <h1 id="header_2">Header</h1>
+
Place a marker in the document where you would like the Table of Contents to
appear. Then, a nested list of all the headers in the document will replace the
marker. The marker defaults to `[TOC]` so the following document:
@@ -41,6 +55,14 @@ would generate the following output:
<h1 id="header-1">Header 1</h1>
<h1 id="header-2">Header 2</h1>
+Regardless of whether a `marker` is found in the document (or disabled), the Table of
+Contents is available as an attribute (`toc`) on the Markdown class. This allows
+one to insert the Table of Contents elsewhere in their page template. For example:
+
+ >>> md = markdown.Markdown(extensions=['markdown.extensions.toc'])
+ >>> html = md.convert(text)
+ >>> page = render_some_template(context={'body': html, 'toc': md.toc})
+
Usage
-----
@@ -53,37 +75,57 @@ configuring extensions.
The following options are provided to configure the output:
* **`marker`**:
- Text to find and replace with the Table of Contents. Defaults
- to `[TOC]`.
+ Text to find and replace with the Table of Contents. Defaults to `[TOC]`.
+
+ Set to an empty string to disable searching for a marker, which may save some time,
+ especially on long documents.
- Regardless of whether a `marker` is found in the document, the Table of Contents is
- also available as an attribute (`toc`) of the Markdown class. This allows one to insert
- the Table of Contents elsewhere in their page template. For example:
+* **`title`**:
+ Title to insert in the Table of Contents' `<div>`. Defaults to `None`.
- >>> text = '''
- # Header 1
+* **`anchorlink`**:
+ Set to `True` to cause all headers to link to themselves. Default is `False`.
- ## Header 2
- '''
- >>> md = markdown.Markdown(extensions=['markdown.extensions.toc'])
- >>> html = md.convert(text)
- >>> render_some_template(context={'body': html, 'toc': md.toc})
+* **`permalink`**:
+ Set to `True` or a string to generate permanent links at the end of each header.
+ Useful with Sphinx stylesheets.
+
+ When set to `True` the paragraph symbol (&para; -- `&para;`) is used as the link
+ text. When set to a string, the provided string is used as the link text.
+
+* **`baselevel`**:
+ Base level for headers.
+
+ Default: `1`
+
+ The `baselevel` setting allows the header levels to be automatically adjusted to
+ fit within the hierarchy of your html templates. For example, suppose the
+ Markdown text for a page should not contain any headers higher than level 3
+ (`<h3>`). The following will accomplish that:
+
+ >>> text = '''
+ ... #Some Header
+ ... ## Next Level'''
+ >>> from markdown.extensions.toc import TocExtension
+ >>> html = markdown.markdown(text, extensions=[TocExtension(baselevel=3)])
+ >>> print html
+ <h3 id="some_header">Some Header</h3>
+ <h4 id="next_level">Next Level</h4>'
* **`slugify`**:
- Callable to generate anchors based on header text. Defaults to a built in
- `slugify` method. The callable must accept two arguments, the first
- contains the text content of the header and the second contains the
- separator. It should then return a string which will be used as the anchor
- text.
+ Callable to generate anchors.
-* **`title`**:
- Title to insert in the Table of Contents' `<div>`. Defaults to `None`.
+ Default: `markdown.extensions.headerid.slugify`
-* **`anchorlink`**:
- Setting to `True` will cause the headers link to themselves. Default is
- `False`.
+ In order to use a different algorithm to define the id attributes, define and
+ pass in a callable which takes the following two arguments:
-* **`permalink`**:
- Set to `True` to have this extension generate a Sphinx-style permanent links
- near the headers (for use with Sphinx stylesheets).
+ * `value`: The string to slugify.
+ * `separator`: The Word Separator.
+
+ The callable must return a string appropriate for use in HTML `id` attributes.
+
+* **`separator`**:
+ Word separator. Character which replaces whitespace in id.
+ Default: `-` \ No newline at end of file
diff --git a/docs/release-2.6.txt b/docs/release-2.6.txt
index 0724700..23fdfdd 100644
--- a/docs/release-2.6.txt
+++ b/docs/release-2.6.txt
@@ -96,6 +96,19 @@ Backwards-incompatible Changes
be used instead. See the [documentation](reference.html#extension-configs)
for a full explaination of the current behavior.
+* The [HeaderId][hid] Extension is pending deprecation and will raise a
+ **`PendingDeprecationWarning`** in version 2.6. The extension will be
+ deprecated in version 2.7 and raise an error in version 2.8. Use the
+ [Table of Contents][TOC] Extension instead, which offers most of the
+ features of the HeaderId Extension and more (support for meta data is missing).
+
+ Extension authors who have been using the `slugify` and `unique` functions
+ defined in the HeaderId Extension should note that those functions are now
+ defined in the Table of Contents extension and should adjust their import
+ statements accordingly (`from markdown.extensions.toc import slugify, unique`).
+
+[hid]: extensions/headerid.html
+
What's New in Python-Markdown 2.6
---------------------------------
@@ -110,15 +123,29 @@ What's New in Python-Markdown 2.6
[Meta-Data]: extensions/meta_data.html
[YAML]: http://yaml.org/
-* The [TOC] Extension has been refactored. Significantly, the extension now
- assigns the Table of Contents to the `toc` attrbibute of the Markdown class
- regardless of whether a "marker" was found in the document. Third party
- frameworks no longer need to insert a "marker," run the document through
- Markdown, then extract the TOC from the document.
+* The [Table fo Contents][TOC] Extension has been refactored and some new features
+ have been added. See the documentation for a full explaination of each feature
+ listed below:
+
+ * The extension now assigns the Table of Contents to the `toc` attribute of
+ the Markdown class regardless of whether a "marker" was found in the document.
+ Third party frameworks no longer need to insert a "marker," run the document
+ through Markdown, then extract the TOC from the document.
- Additionaly, the TOC Extension is now a "registered extension." Therefore,
- when the `reset` method of the Markdown class is called, the `toc` attribute
- on the Markdown class is cleared (set to an empty string).
+ * The TOC Extension is now a "registered extension." Therefore, when the `reset`
+ method of the Markdown class is called, the `toc` attribute on the Markdown
+ class is cleared (set to an empty string).
+
+ * When the `marker` config option is set to an empty string, the parser completely
+ skips the process of searching the document for markers. This should save parsing
+ time when the TOC Extension is being used only to assign ids to headers.
+
+ * A `separator` config option has been added allowing users to override the
+ separator character used by the slugify function.
+
+ * A `baselevel` config option has been added allowing users to set the base level
+ of headers in their documents (h1-h6). This allows the header levels to be
+ automatically adjusted to fit within the hierarchy of an html template.
[TOC]: extensions/toc.html
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py
index c9f2a21..9340a1b 100644
--- a/markdown/extensions/headerid.py
+++ b/markdown/extensions/headerid.py
@@ -19,64 +19,13 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
from ..treeprocessors import Treeprocessor
-from ..util import HTML_PLACEHOLDER_RE, parseBoolValue
-import re
+from ..util import parseBoolValue
+from .toc import slugify, unique, stashedHTML2text
import logging
-import unicodedata
+import warnings
logger = logging.getLogger('MARKDOWN')
-
-IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
-
-
-def slugify(value, separator):
- """ Slugify a string, to make it URL friendly. """
- value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
- value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower()
- return re.sub('[%s\s]+' % separator, separator, value)
-
-
-def unique(id, ids):
- """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
- while id in ids or not id:
- m = IDCOUNT_RE.match(id)
- if m:
- id = '%s_%d' % (m.group(1), int(m.group(2))+1)
- else:
- id = '%s_%d' % (id, 1)
- ids.add(id)
- return id
-
-
-def itertext(elem):
- """ Loop through all children and return text only.
-
- Reimplements method of same name added to ElementTree in Python 2.7
-
- """
- if elem.text:
- yield elem.text
- for e in elem:
- for s in itertext(e):
- yield s
- if e.tail:
- yield e.tail
-
-
-def stashedHTML2text(text, md):
- """ Extract raw HTML, reduce to plain text and swap with placeholder. """
- def _html_sub(m):
- """ Substitute raw html with plain text. """
- try:
- raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
- except (IndexError, TypeError):
- return m.group(0)
- if md.safeMode and not safe:
- return ''
- # Strip out tags and entities - leaveing text
- return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
-
- return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
+logging.captureWarnings(True)
class HeaderIdTreeprocessor(Treeprocessor):
@@ -94,7 +43,7 @@ class HeaderIdTreeprocessor(Treeprocessor):
if "id" in elem.attrib:
id = elem.get('id')
else:
- id = stashedHTML2text(''.join(itertext(elem)), self.md)
+ id = stashedHTML2text(''.join(elem.itertext()), self.md)
id = slugify(id, sep)
elem.set('id', unique(id, self.IDs))
if start_level:
@@ -127,6 +76,11 @@ class HeaderIdExtension(Extension):
super(HeaderIdExtension, self).__init__(*args, **kwargs)
+ warnings.warn(
+ 'The HeaderId Extension is pending deprecation. Use the TOC Extension instead.',
+ PendingDeprecationWarning
+ )
+
def extendMarkdown(self, md, md_globals):
md.registerExtension(self)
self.processor = HeaderIdTreeprocessor()
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
index cea3440..456c623 100644
--- a/markdown/extensions/toc.py
+++ b/markdown/extensions/toc.py
@@ -17,9 +17,47 @@ from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
from ..treeprocessors import Treeprocessor
-from ..util import etree, parseBoolValue, AMP_SUBSTITUTE
-from .headerid import slugify, unique, itertext, stashedHTML2text
+from ..util import etree, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
import re
+import unicodedata
+
+
+def slugify(value, separator):
+ """ Slugify a string, to make it URL friendly. """
+ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+ value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower()
+ return re.sub('[%s\s]+' % separator, separator, value)
+
+
+IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
+
+
+def unique(id, ids):
+ """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
+ while id in ids or not id:
+ m = IDCOUNT_RE.match(id)
+ if m:
+ id = '%s_%d' % (m.group(1), int(m.group(2))+1)
+ else:
+ id = '%s_%d' % (id, 1)
+ ids.add(id)
+ return id
+
+
+def stashedHTML2text(text, md):
+ """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
+ def _html_sub(m):
+ """ Substitute raw html with plain text. """
+ try:
+ raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
+ except (IndexError, TypeError): # pragma: no cover
+ return m.group(0)
+ if md.safeMode and not safe: # pragma: no cover
+ return ''
+ # Strip out tags and entities - leaveing text
+ return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
+
+ return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
def order_toc_list(toc_list):
@@ -91,7 +129,9 @@ class TocTreeprocessor(Treeprocessor):
self.marker = config["marker"]
self.title = config["title"]
+ self.base_level = int(config["baselevel"]) - 1
self.slugify = config["slugify"]
+ self.sep = config["separator"]
self.use_anchors = parseBoolValue(config["anchorlink"])
self.use_permalinks = parseBoolValue(config["permalink"], False)
if self.use_permalinks is None:
@@ -108,7 +148,7 @@ class TocTreeprocessor(Treeprocessor):
def replace_marker(self, root, elem):
''' Replace marker with elem. '''
for (p, c) in self.iterparent(root):
- text = ''.join(itertext(c)).strip()
+ text = ''.join(c.itertext()).strip()
if not text:
continue
@@ -125,6 +165,13 @@ class TocTreeprocessor(Treeprocessor):
p[i] = elem
break
+ def set_level(self, elem):
+ ''' Adjust header level according to base level. '''
+ level = int(elem.tag[-1]) + self.base_level
+ if level > 6:
+ level = 6
+ elem.tag = 'h%d' % level
+
def add_anchor(self, c, elem_id): # @ReservedAssignment
anchor = etree.Element("a")
anchor.text = c.text
@@ -176,17 +223,19 @@ class TocTreeprocessor(Treeprocessor):
div = etree.Element("div")
div.attrib["class"] = "toc"
- self.replace_marker(doc, div)
+ if self.marker:
+ self.replace_marker(doc, div)
toc_list = []
for el in doc.iter():
if self.header_rgx.match(el.tag):
- text = ''.join(itertext(el)).strip()
+ self.set_level(el)
+ text = ''.join(el.itertext()).strip()
# Do not override pre-existing ids
if "id" not in el.attrib:
elem_id = stashedHTML2text(text, self.markdown)
- elem_id = unique(self.slugify(elem_id, '-'), used_ids)
+ elem_id = unique(self.slugify(elem_id, self.sep), used_ids)
el.attrib["id"] = elem_id
else:
elem_id = el.attrib["id"]
@@ -219,12 +268,9 @@ class TocExtension(Extension):
def __init__(self, *args, **kwargs):
self.config = {
- "marker": ["[TOC]",
- "Text to find and replace with Table of Contents - "
- "Defaults to \"[TOC]\""],
- "slugify": [slugify,
- "Function to generate anchors based on header text - "
- "Defaults to the headerid ext's slugify function."],
+ "marker": ['[TOC]',
+ 'Text to find and replace with Table of Contents - '
+ 'Set to an empty string to disable. Defaults to "[TOC]"'],
"title": ["",
"Title to insert into TOC <div> - "
"Defaults to an empty string"],
@@ -233,7 +279,12 @@ class TocExtension(Extension):
"Defaults to False"],
"permalink": [0,
"True or link text if a Sphinx-style permalink should "
- "be added - Defaults to False"]
+ "be added - Defaults to False"],
+ "baselevel": ['1', 'Base level for headers.'],
+ "slugify": [slugify,
+ "Function to generate anchors based on header text - "
+ "Defaults to the headerid ext's slugify function."],
+ 'separator': ['-', 'Word separator. Defaults to "-".']
}
super(TocExtension, self).__init__(*args, **kwargs)
diff --git a/tests/test_extensions.py b/tests/test_extensions.py
index dae8829..6642921 100644
--- a/tests/test_extensions.py
+++ b/tests/test_extensions.py
@@ -394,60 +394,6 @@ class TestHeaderId(unittest.TestCase):
'<h1 id="some-header">Some Header</h1>'
)
- def testUniqueFunc(self):
- """ Test 'unique' function. """
- from markdown.extensions.headerid import unique
- ids = set(['foo'])
- self.assertEqual(unique('foo', ids), 'foo_1')
- self.assertEqual(ids, set(['foo', 'foo_1']))
-
- def testUniqueIds(self):
- """ Test Unique IDs. """
-
- text = '#Header\n#Header\n#Header'
- self.assertEqual(
- self.md.convert(text),
- '<h1 id="header">Header</h1>\n'
- '<h1 id="header_1">Header</h1>\n'
- '<h1 id="header_2">Header</h1>'
- )
-
- def testBaseLevel(self):
- """ Test Header Base Level. """
-
- text = '#Some Header\n## Next Level'
- self.assertEqual(
- markdown.markdown(text, [markdown.extensions.headerid.HeaderIdExtension(level=3)]),
- '<h3 id="some-header">Some Header</h3>\n'
- '<h4 id="next-level">Next Level</h4>'
- )
-
- def testHeaderInlineMarkup(self):
- """ Test Header IDs with inline markup. """
-
- text = '#Some *Header* with [markup](http://example.com).'
- self.assertEqual(
- self.md.convert(text),
- '<h1 id="some-header-with-markup">Some <em>Header</em> with '
- '<a href="http://example.com">markup</a>.</h1>'
- )
-
- def testHtmlEntities(self):
- """ Test HeaderIDs with HTML Entities. """
- text = '# Foo &amp; bar'
- self.assertEqual(
- self.md.convert(text),
- '<h1 id="foo-bar">Foo &amp; bar</h1>'
- )
-
- def testRawHtml(self):
- """ Test HeaderIDs with raw HTML. """
- text = '# Foo <b>Bar</b> Baz.'
- self.assertEqual(
- self.md.convert(text),
- '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>'
- )
-
def testNoAutoIds(self):
""" Test HeaderIDs with no auto generated IDs. """
@@ -733,6 +679,41 @@ class TestTOC(unittest.TestCase):
'</div>\n'
)
+ def testAlternateMarker(self):
+ """ Test TOC with user defined marker. """
+ md = markdown.Markdown(
+ extensions=[markdown.extensions.toc.TocExtension(marker='{{marker}}')]
+ )
+ text = '{{marker}}\n\n# Header 1\n\n## Header 2'
+ self.assertEqual(
+ md.convert(text),
+ '<div class="toc">\n'
+ '<ul>\n' # noqa
+ '<li><a href="#header-1">Header 1</a>' # noqa
+ '<ul>\n' # noqa
+ '<li><a href="#header-2">Header 2</a></li>\n' # noqa
+ '</ul>\n' # noqa
+ '</li>\n' # noqa
+ '</ul>\n' # noqa
+ '</div>\n'
+ '<h1 id="header-1">Header 1</h1>\n'
+ '<h2 id="header-2">Header 2</h2>'
+ )
+
+ def testDisabledMarker(self):
+ """ Test TOC with disabled marker. """
+ md = markdown.Markdown(
+ extensions=[markdown.extensions.toc.TocExtension(marker='')]
+ )
+ text = '[TOC]\n\n# Header 1\n\n## Header 2'
+ self.assertEqual(
+ md.convert(text),
+ '<p>[TOC]</p>\n'
+ '<h1 id="header-1">Header 1</h1>\n'
+ '<h2 id="header-2">Header 2</h2>'
+ )
+ self.assertTrue(md.toc.startswith('<div class="toc">'))
+
def testReset(self):
""" Test TOC Reset. """
self.assertEqual(self.md.toc, '')
@@ -741,6 +722,69 @@ class TestTOC(unittest.TestCase):
self.md.reset()
self.assertEqual(self.md.toc, '')
+ def testUniqueIds(self):
+ """ Test Unique IDs. """
+
+ text = '#Header\n#Header\n#Header'
+ self.assertEqual(
+ self.md.convert(text),
+ '<h1 id="header">Header</h1>\n'
+ '<h1 id="header_1">Header</h1>\n'
+ '<h1 id="header_2">Header</h1>'
+ )
+
+ def testHtmlEntities(self):
+ """ Test Headers with HTML Entities. """
+ text = '# Foo &amp; bar'
+ self.assertEqual(
+ self.md.convert(text),
+ '<h1 id="foo-bar">Foo &amp; bar</h1>'
+ )
+
+ def testRawHtml(self):
+ """ Test Headers with raw HTML. """
+ text = '# Foo <b>Bar</b> Baz.'
+ self.assertEqual(
+ self.md.convert(text),
+ '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>'
+ )
+
+ def testBaseLevel(self):
+ """ Test Header Base Level. """
+ md = markdown.Markdown(
+ extensions=[markdown.extensions.toc.TocExtension(baselevel=5)]
+ )
+ text = '# Some Header\n\n## Next Level\n\n### Too High'
+ self.assertEqual(
+ md.convert(text),
+ '<h5 id="some-header">Some Header</h5>\n'
+ '<h6 id="next-level">Next Level</h6>\n'
+ '<h6 id="too-high">Too High</h6>'
+ )
+ self.assertEqual(
+ md.toc,
+ '<div class="toc">\n'
+ '<ul>\n' # noqa
+ '<li><a href="#some-header">Some Header</a>' # noqa
+ '<ul>\n' # noqa
+ '<li><a href="#next-level">Next Level</a></li>\n' # noqa
+ '<li><a href="#too-high">Too High</a></li>\n' # noqa
+ '</ul>\n' # noqa
+ '</li>\n' # noqa
+ '</ul>\n' # noqa
+ '</div>\n'
+ )
+
+ def testHeaderInlineMarkup(self):
+ """ Test Headers with inline markup. """
+
+ text = '#Some *Header* with [markup](http://example.com).'
+ self.assertEqual(
+ self.md.convert(text),
+ '<h1 id="some-header-with-markup">Some <em>Header</em> with '
+ '<a href="http://example.com">markup</a>.</h1>'
+ )
+
def testAnchorLink(self):
""" Test TOC Anchorlink. """
md = markdown.Markdown(
@@ -783,6 +827,13 @@ class TestTOC(unittest.TestCase):
'</div>\n'
)
+ def testUniqueFunc(self):
+ """ Test 'unique' function. """
+ from markdown.extensions.toc import unique
+ ids = set(['foo'])
+ self.assertEqual(unique('foo', ids), 'foo_1')
+ self.assertEqual(ids, set(['foo', 'foo_1']))
+
class TestSmarty(unittest.TestCase):
def setUp(self):