diff options
author | Waylan Limberg <waylan.limberg@icloud.com> | 2015-01-01 13:25:04 -0500 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2015-01-01 13:25:04 -0500 |
commit | 5389174da277ca13f9b430c2cc8db6d011499205 (patch) | |
tree | b158cc4671808a94d240f0ce6dbe2b864b0a5fa5 | |
parent | fef6e285d28e25cf4acb93f0ef470fd690507cfa (diff) | |
download | markdown-5389174da277ca13f9b430c2cc8db6d011499205.tar.gz markdown-5389174da277ca13f9b430c2cc8db6d011499205.tar.bz2 markdown-5389174da277ca13f9b430c2cc8db6d011499205.zip |
HeaderId Extension marked as Pending Deprecation.
Use the Table of Contents Extension instead. The HeaderId Extension will
raise a PendingDeprecationWarning.
The last few features of the HeaderID extension were mirgrated to TOC
including the baselevel and separator config options. Also, the
marker config option of TOC can be set to an empty string to disable
searching for a marker.
The `slugify`, `unique` and `stashedHTML2text` functions are now defined
in the TOC extension in preperation for the HeaderId extension being
removed. All coresponding tests are now run against the TOC Extension.
The meta-data support of the HeaderId Extension was not migrated and no plan
exists to make that migration. The `forceid` config option makes no sense in
the TOC Extension and the only other config setting supported by meta-data
was the `header_level`. However, as that depends on the template, it makes
more sense to not be defined at the document level.
-rw-r--r-- | docs/extensions/header_id.txt | 9 | ||||
-rw-r--r-- | docs/extensions/toc.txt | 92 | ||||
-rw-r--r-- | docs/release-2.6.txt | 43 | ||||
-rw-r--r-- | markdown/extensions/headerid.py | 66 | ||||
-rw-r--r-- | markdown/extensions/toc.py | 77 | ||||
-rw-r--r-- | tests/test_extensions.py | 159 |
6 files changed, 289 insertions, 157 deletions
diff --git a/docs/extensions/header_id.txt b/docs/extensions/header_id.txt index 2881c50..42e640e 100644 --- a/docs/extensions/header_id.txt +++ b/docs/extensions/header_id.txt @@ -15,6 +15,13 @@ elements (`h1`-`h6`) in the resulting HTML document. This extension is included in the standard Markdown library. +!!! warning + This extension is **Pending Deprecation**. The [Table of Contents][toc] + Extension should be used instead, which offers most the features of this + extension and more. + +[toc]: toc.html + Syntax ------ @@ -55,7 +62,7 @@ The following options are provided to configure the output: >>> text = ''' ... #Some Header ... ## Next Level''' - >>> from markdown.extensions.headerid import HeaderIdExtension + >>> from markdown.extensions.headerid import HeaderIdExtension >>> html = markdown.markdown(text, extensions=[HeaderIdExtension(level=3)]) >>> print html <h3 id="some_header">Some Header</h3> diff --git a/docs/extensions/toc.txt b/docs/extensions/toc.txt index 56a8ee0..c6a99bf 100644 --- a/docs/extensions/toc.txt +++ b/docs/extensions/toc.txt @@ -18,6 +18,20 @@ This extension is included in the standard Markdown library. Syntax ------ +By default, all headers will automatically have unique `id` attributes +generated based upon the text of the header. Note this example, in which all +three headers would have the same `id`: + + #Header + #Header + #Header + +Results in: + + <h1 id="header">Header</h1> + <h1 id="header_1">Header</h1> + <h1 id="header_2">Header</h1> + Place a marker in the document where you would like the Table of Contents to appear. Then, a nested list of all the headers in the document will replace the marker. The marker defaults to `[TOC]` so the following document: @@ -41,6 +55,14 @@ would generate the following output: <h1 id="header-1">Header 1</h1> <h1 id="header-2">Header 2</h1> +Regardless of whether a `marker` is found in the document (or disabled), the Table of +Contents is available as an attribute (`toc`) on the Markdown class. This allows +one to insert the Table of Contents elsewhere in their page template. For example: + + >>> md = markdown.Markdown(extensions=['markdown.extensions.toc']) + >>> html = md.convert(text) + >>> page = render_some_template(context={'body': html, 'toc': md.toc}) + Usage ----- @@ -53,37 +75,57 @@ configuring extensions. The following options are provided to configure the output: * **`marker`**: - Text to find and replace with the Table of Contents. Defaults - to `[TOC]`. + Text to find and replace with the Table of Contents. Defaults to `[TOC]`. + + Set to an empty string to disable searching for a marker, which may save some time, + especially on long documents. - Regardless of whether a `marker` is found in the document, the Table of Contents is - also available as an attribute (`toc`) of the Markdown class. This allows one to insert - the Table of Contents elsewhere in their page template. For example: +* **`title`**: + Title to insert in the Table of Contents' `<div>`. Defaults to `None`. - >>> text = ''' - # Header 1 +* **`anchorlink`**: + Set to `True` to cause all headers to link to themselves. Default is `False`. - ## Header 2 - ''' - >>> md = markdown.Markdown(extensions=['markdown.extensions.toc']) - >>> html = md.convert(text) - >>> render_some_template(context={'body': html, 'toc': md.toc}) +* **`permalink`**: + Set to `True` or a string to generate permanent links at the end of each header. + Useful with Sphinx stylesheets. + + When set to `True` the paragraph symbol (¶ -- `¶`) is used as the link + text. When set to a string, the provided string is used as the link text. + +* **`baselevel`**: + Base level for headers. + + Default: `1` + + The `baselevel` setting allows the header levels to be automatically adjusted to + fit within the hierarchy of your html templates. For example, suppose the + Markdown text for a page should not contain any headers higher than level 3 + (`<h3>`). The following will accomplish that: + + >>> text = ''' + ... #Some Header + ... ## Next Level''' + >>> from markdown.extensions.toc import TocExtension + >>> html = markdown.markdown(text, extensions=[TocExtension(baselevel=3)]) + >>> print html + <h3 id="some_header">Some Header</h3> + <h4 id="next_level">Next Level</h4>' * **`slugify`**: - Callable to generate anchors based on header text. Defaults to a built in - `slugify` method. The callable must accept two arguments, the first - contains the text content of the header and the second contains the - separator. It should then return a string which will be used as the anchor - text. + Callable to generate anchors. -* **`title`**: - Title to insert in the Table of Contents' `<div>`. Defaults to `None`. + Default: `markdown.extensions.headerid.slugify` -* **`anchorlink`**: - Setting to `True` will cause the headers link to themselves. Default is - `False`. + In order to use a different algorithm to define the id attributes, define and + pass in a callable which takes the following two arguments: -* **`permalink`**: - Set to `True` to have this extension generate a Sphinx-style permanent links - near the headers (for use with Sphinx stylesheets). + * `value`: The string to slugify. + * `separator`: The Word Separator. + + The callable must return a string appropriate for use in HTML `id` attributes. + +* **`separator`**: + Word separator. Character which replaces whitespace in id. + Default: `-`
\ No newline at end of file diff --git a/docs/release-2.6.txt b/docs/release-2.6.txt index 0724700..23fdfdd 100644 --- a/docs/release-2.6.txt +++ b/docs/release-2.6.txt @@ -96,6 +96,19 @@ Backwards-incompatible Changes be used instead. See the [documentation](reference.html#extension-configs) for a full explaination of the current behavior. +* The [HeaderId][hid] Extension is pending deprecation and will raise a + **`PendingDeprecationWarning`** in version 2.6. The extension will be + deprecated in version 2.7 and raise an error in version 2.8. Use the + [Table of Contents][TOC] Extension instead, which offers most of the + features of the HeaderId Extension and more (support for meta data is missing). + + Extension authors who have been using the `slugify` and `unique` functions + defined in the HeaderId Extension should note that those functions are now + defined in the Table of Contents extension and should adjust their import + statements accordingly (`from markdown.extensions.toc import slugify, unique`). + +[hid]: extensions/headerid.html + What's New in Python-Markdown 2.6 --------------------------------- @@ -110,15 +123,29 @@ What's New in Python-Markdown 2.6 [Meta-Data]: extensions/meta_data.html [YAML]: http://yaml.org/ -* The [TOC] Extension has been refactored. Significantly, the extension now - assigns the Table of Contents to the `toc` attrbibute of the Markdown class - regardless of whether a "marker" was found in the document. Third party - frameworks no longer need to insert a "marker," run the document through - Markdown, then extract the TOC from the document. +* The [Table fo Contents][TOC] Extension has been refactored and some new features + have been added. See the documentation for a full explaination of each feature + listed below: + + * The extension now assigns the Table of Contents to the `toc` attribute of + the Markdown class regardless of whether a "marker" was found in the document. + Third party frameworks no longer need to insert a "marker," run the document + through Markdown, then extract the TOC from the document. - Additionaly, the TOC Extension is now a "registered extension." Therefore, - when the `reset` method of the Markdown class is called, the `toc` attribute - on the Markdown class is cleared (set to an empty string). + * The TOC Extension is now a "registered extension." Therefore, when the `reset` + method of the Markdown class is called, the `toc` attribute on the Markdown + class is cleared (set to an empty string). + + * When the `marker` config option is set to an empty string, the parser completely + skips the process of searching the document for markers. This should save parsing + time when the TOC Extension is being used only to assign ids to headers. + + * A `separator` config option has been added allowing users to override the + separator character used by the slugify function. + + * A `baselevel` config option has been added allowing users to set the base level + of headers in their documents (h1-h6). This allows the header levels to be + automatically adjusted to fit within the hierarchy of an html template. [TOC]: extensions/toc.html diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index c9f2a21..9340a1b 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -19,64 +19,13 @@ from __future__ import absolute_import from __future__ import unicode_literals from . import Extension from ..treeprocessors import Treeprocessor -from ..util import HTML_PLACEHOLDER_RE, parseBoolValue -import re +from ..util import parseBoolValue +from .toc import slugify, unique, stashedHTML2text import logging -import unicodedata +import warnings logger = logging.getLogger('MARKDOWN') - -IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') - - -def slugify(value, separator): - """ Slugify a string, to make it URL friendly. """ - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() - return re.sub('[%s\s]+' % separator, separator, value) - - -def unique(id, ids): - """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ - while id in ids or not id: - m = IDCOUNT_RE.match(id) - if m: - id = '%s_%d' % (m.group(1), int(m.group(2))+1) - else: - id = '%s_%d' % (id, 1) - ids.add(id) - return id - - -def itertext(elem): - """ Loop through all children and return text only. - - Reimplements method of same name added to ElementTree in Python 2.7 - - """ - if elem.text: - yield elem.text - for e in elem: - for s in itertext(e): - yield s - if e.tail: - yield e.tail - - -def stashedHTML2text(text, md): - """ Extract raw HTML, reduce to plain text and swap with placeholder. """ - def _html_sub(m): - """ Substitute raw html with plain text. """ - try: - raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] - except (IndexError, TypeError): - return m.group(0) - if md.safeMode and not safe: - return '' - # Strip out tags and entities - leaveing text - return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) - - return HTML_PLACEHOLDER_RE.sub(_html_sub, text) +logging.captureWarnings(True) class HeaderIdTreeprocessor(Treeprocessor): @@ -94,7 +43,7 @@ class HeaderIdTreeprocessor(Treeprocessor): if "id" in elem.attrib: id = elem.get('id') else: - id = stashedHTML2text(''.join(itertext(elem)), self.md) + id = stashedHTML2text(''.join(elem.itertext()), self.md) id = slugify(id, sep) elem.set('id', unique(id, self.IDs)) if start_level: @@ -127,6 +76,11 @@ class HeaderIdExtension(Extension): super(HeaderIdExtension, self).__init__(*args, **kwargs) + warnings.warn( + 'The HeaderId Extension is pending deprecation. Use the TOC Extension instead.', + PendingDeprecationWarning + ) + def extendMarkdown(self, md, md_globals): md.registerExtension(self) self.processor = HeaderIdTreeprocessor() diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index cea3440..456c623 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -17,9 +17,47 @@ from __future__ import absolute_import from __future__ import unicode_literals from . import Extension from ..treeprocessors import Treeprocessor -from ..util import etree, parseBoolValue, AMP_SUBSTITUTE -from .headerid import slugify, unique, itertext, stashedHTML2text +from ..util import etree, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE import re +import unicodedata + + +def slugify(value, separator): + """ Slugify a string, to make it URL friendly. """ + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() + return re.sub('[%s\s]+' % separator, separator, value) + + +IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') + + +def unique(id, ids): + """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ + while id in ids or not id: + m = IDCOUNT_RE.match(id) + if m: + id = '%s_%d' % (m.group(1), int(m.group(2))+1) + else: + id = '%s_%d' % (id, 1) + ids.add(id) + return id + + +def stashedHTML2text(text, md): + """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): # pragma: no cover + return m.group(0) + if md.safeMode and not safe: # pragma: no cover + return '' + # Strip out tags and entities - leaveing text + return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + + return HTML_PLACEHOLDER_RE.sub(_html_sub, text) def order_toc_list(toc_list): @@ -91,7 +129,9 @@ class TocTreeprocessor(Treeprocessor): self.marker = config["marker"] self.title = config["title"] + self.base_level = int(config["baselevel"]) - 1 self.slugify = config["slugify"] + self.sep = config["separator"] self.use_anchors = parseBoolValue(config["anchorlink"]) self.use_permalinks = parseBoolValue(config["permalink"], False) if self.use_permalinks is None: @@ -108,7 +148,7 @@ class TocTreeprocessor(Treeprocessor): def replace_marker(self, root, elem): ''' Replace marker with elem. ''' for (p, c) in self.iterparent(root): - text = ''.join(itertext(c)).strip() + text = ''.join(c.itertext()).strip() if not text: continue @@ -125,6 +165,13 @@ class TocTreeprocessor(Treeprocessor): p[i] = elem break + def set_level(self, elem): + ''' Adjust header level according to base level. ''' + level = int(elem.tag[-1]) + self.base_level + if level > 6: + level = 6 + elem.tag = 'h%d' % level + def add_anchor(self, c, elem_id): # @ReservedAssignment anchor = etree.Element("a") anchor.text = c.text @@ -176,17 +223,19 @@ class TocTreeprocessor(Treeprocessor): div = etree.Element("div") div.attrib["class"] = "toc" - self.replace_marker(doc, div) + if self.marker: + self.replace_marker(doc, div) toc_list = [] for el in doc.iter(): if self.header_rgx.match(el.tag): - text = ''.join(itertext(el)).strip() + self.set_level(el) + text = ''.join(el.itertext()).strip() # Do not override pre-existing ids if "id" not in el.attrib: elem_id = stashedHTML2text(text, self.markdown) - elem_id = unique(self.slugify(elem_id, '-'), used_ids) + elem_id = unique(self.slugify(elem_id, self.sep), used_ids) el.attrib["id"] = elem_id else: elem_id = el.attrib["id"] @@ -219,12 +268,9 @@ class TocExtension(Extension): def __init__(self, *args, **kwargs): self.config = { - "marker": ["[TOC]", - "Text to find and replace with Table of Contents - " - "Defaults to \"[TOC]\""], - "slugify": [slugify, - "Function to generate anchors based on header text - " - "Defaults to the headerid ext's slugify function."], + "marker": ['[TOC]', + 'Text to find and replace with Table of Contents - ' + 'Set to an empty string to disable. Defaults to "[TOC]"'], "title": ["", "Title to insert into TOC <div> - " "Defaults to an empty string"], @@ -233,7 +279,12 @@ class TocExtension(Extension): "Defaults to False"], "permalink": [0, "True or link text if a Sphinx-style permalink should " - "be added - Defaults to False"] + "be added - Defaults to False"], + "baselevel": ['1', 'Base level for headers.'], + "slugify": [slugify, + "Function to generate anchors based on header text - " + "Defaults to the headerid ext's slugify function."], + 'separator': ['-', 'Word separator. Defaults to "-".'] } super(TocExtension, self).__init__(*args, **kwargs) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index dae8829..6642921 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -394,60 +394,6 @@ class TestHeaderId(unittest.TestCase): '<h1 id="some-header">Some Header</h1>' ) - def testUniqueFunc(self): - """ Test 'unique' function. """ - from markdown.extensions.headerid import unique - ids = set(['foo']) - self.assertEqual(unique('foo', ids), 'foo_1') - self.assertEqual(ids, set(['foo', 'foo_1'])) - - def testUniqueIds(self): - """ Test Unique IDs. """ - - text = '#Header\n#Header\n#Header' - self.assertEqual( - self.md.convert(text), - '<h1 id="header">Header</h1>\n' - '<h1 id="header_1">Header</h1>\n' - '<h1 id="header_2">Header</h1>' - ) - - def testBaseLevel(self): - """ Test Header Base Level. """ - - text = '#Some Header\n## Next Level' - self.assertEqual( - markdown.markdown(text, [markdown.extensions.headerid.HeaderIdExtension(level=3)]), - '<h3 id="some-header">Some Header</h3>\n' - '<h4 id="next-level">Next Level</h4>' - ) - - def testHeaderInlineMarkup(self): - """ Test Header IDs with inline markup. """ - - text = '#Some *Header* with [markup](http://example.com).' - self.assertEqual( - self.md.convert(text), - '<h1 id="some-header-with-markup">Some <em>Header</em> with ' - '<a href="http://example.com">markup</a>.</h1>' - ) - - def testHtmlEntities(self): - """ Test HeaderIDs with HTML Entities. """ - text = '# Foo & bar' - self.assertEqual( - self.md.convert(text), - '<h1 id="foo-bar">Foo & bar</h1>' - ) - - def testRawHtml(self): - """ Test HeaderIDs with raw HTML. """ - text = '# Foo <b>Bar</b> Baz.' - self.assertEqual( - self.md.convert(text), - '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>' - ) - def testNoAutoIds(self): """ Test HeaderIDs with no auto generated IDs. """ @@ -733,6 +679,41 @@ class TestTOC(unittest.TestCase): '</div>\n' ) + def testAlternateMarker(self): + """ Test TOC with user defined marker. """ + md = markdown.Markdown( + extensions=[markdown.extensions.toc.TocExtension(marker='{{marker}}')] + ) + text = '{{marker}}\n\n# Header 1\n\n## Header 2' + self.assertEqual( + md.convert(text), + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#header-1">Header 1</a>' # noqa + '<ul>\n' # noqa + '<li><a href="#header-2">Header 2</a></li>\n' # noqa + '</ul>\n' # noqa + '</li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + '<h1 id="header-1">Header 1</h1>\n' + '<h2 id="header-2">Header 2</h2>' + ) + + def testDisabledMarker(self): + """ Test TOC with disabled marker. """ + md = markdown.Markdown( + extensions=[markdown.extensions.toc.TocExtension(marker='')] + ) + text = '[TOC]\n\n# Header 1\n\n## Header 2' + self.assertEqual( + md.convert(text), + '<p>[TOC]</p>\n' + '<h1 id="header-1">Header 1</h1>\n' + '<h2 id="header-2">Header 2</h2>' + ) + self.assertTrue(md.toc.startswith('<div class="toc">')) + def testReset(self): """ Test TOC Reset. """ self.assertEqual(self.md.toc, '') @@ -741,6 +722,69 @@ class TestTOC(unittest.TestCase): self.md.reset() self.assertEqual(self.md.toc, '') + def testUniqueIds(self): + """ Test Unique IDs. """ + + text = '#Header\n#Header\n#Header' + self.assertEqual( + self.md.convert(text), + '<h1 id="header">Header</h1>\n' + '<h1 id="header_1">Header</h1>\n' + '<h1 id="header_2">Header</h1>' + ) + + def testHtmlEntities(self): + """ Test Headers with HTML Entities. """ + text = '# Foo & bar' + self.assertEqual( + self.md.convert(text), + '<h1 id="foo-bar">Foo & bar</h1>' + ) + + def testRawHtml(self): + """ Test Headers with raw HTML. """ + text = '# Foo <b>Bar</b> Baz.' + self.assertEqual( + self.md.convert(text), + '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>' + ) + + def testBaseLevel(self): + """ Test Header Base Level. """ + md = markdown.Markdown( + extensions=[markdown.extensions.toc.TocExtension(baselevel=5)] + ) + text = '# Some Header\n\n## Next Level\n\n### Too High' + self.assertEqual( + md.convert(text), + '<h5 id="some-header">Some Header</h5>\n' + '<h6 id="next-level">Next Level</h6>\n' + '<h6 id="too-high">Too High</h6>' + ) + self.assertEqual( + md.toc, + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#some-header">Some Header</a>' # noqa + '<ul>\n' # noqa + '<li><a href="#next-level">Next Level</a></li>\n' # noqa + '<li><a href="#too-high">Too High</a></li>\n' # noqa + '</ul>\n' # noqa + '</li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + ) + + def testHeaderInlineMarkup(self): + """ Test Headers with inline markup. """ + + text = '#Some *Header* with [markup](http://example.com).' + self.assertEqual( + self.md.convert(text), + '<h1 id="some-header-with-markup">Some <em>Header</em> with ' + '<a href="http://example.com">markup</a>.</h1>' + ) + def testAnchorLink(self): """ Test TOC Anchorlink. """ md = markdown.Markdown( @@ -783,6 +827,13 @@ class TestTOC(unittest.TestCase): '</div>\n' ) + def testUniqueFunc(self): + """ Test 'unique' function. """ + from markdown.extensions.toc import unique + ids = set(['foo']) + self.assertEqual(unique('foo', ids), 'foo_1') + self.assertEqual(ids, set(['foo', 'foo_1'])) + class TestSmarty(unittest.TestCase): def setUp(self): |