diff options
-rw-r--r-- | docs/extensions/header_id.txt | 9 | ||||
-rw-r--r-- | docs/extensions/toc.txt | 92 | ||||
-rw-r--r-- | docs/release-2.6.txt | 43 | ||||
-rw-r--r-- | markdown/extensions/headerid.py | 66 | ||||
-rw-r--r-- | markdown/extensions/toc.py | 77 | ||||
-rw-r--r-- | tests/test_extensions.py | 159 |
6 files changed, 289 insertions, 157 deletions
diff --git a/docs/extensions/header_id.txt b/docs/extensions/header_id.txt index 2881c50..42e640e 100644 --- a/docs/extensions/header_id.txt +++ b/docs/extensions/header_id.txt @@ -15,6 +15,13 @@ elements (`h1`-`h6`) in the resulting HTML document. This extension is included in the standard Markdown library. +!!! warning + This extension is **Pending Deprecation**. The [Table of Contents][toc] + Extension should be used instead, which offers most the features of this + extension and more. + +[toc]: toc.html + Syntax ------ @@ -55,7 +62,7 @@ The following options are provided to configure the output: >>> text = ''' ... #Some Header ... ## Next Level''' - >>> from markdown.extensions.headerid import HeaderIdExtension + >>> from markdown.extensions.headerid import HeaderIdExtension >>> html = markdown.markdown(text, extensions=[HeaderIdExtension(level=3)]) >>> print html <h3 id="some_header">Some Header</h3> diff --git a/docs/extensions/toc.txt b/docs/extensions/toc.txt index 56a8ee0..c6a99bf 100644 --- a/docs/extensions/toc.txt +++ b/docs/extensions/toc.txt @@ -18,6 +18,20 @@ This extension is included in the standard Markdown library. Syntax ------ +By default, all headers will automatically have unique `id` attributes +generated based upon the text of the header. Note this example, in which all +three headers would have the same `id`: + + #Header + #Header + #Header + +Results in: + + <h1 id="header">Header</h1> + <h1 id="header_1">Header</h1> + <h1 id="header_2">Header</h1> + Place a marker in the document where you would like the Table of Contents to appear. Then, a nested list of all the headers in the document will replace the marker. The marker defaults to `[TOC]` so the following document: @@ -41,6 +55,14 @@ would generate the following output: <h1 id="header-1">Header 1</h1> <h1 id="header-2">Header 2</h1> +Regardless of whether a `marker` is found in the document (or disabled), the Table of +Contents is available as an attribute (`toc`) on the Markdown class. This allows +one to insert the Table of Contents elsewhere in their page template. For example: + + >>> md = markdown.Markdown(extensions=['markdown.extensions.toc']) + >>> html = md.convert(text) + >>> page = render_some_template(context={'body': html, 'toc': md.toc}) + Usage ----- @@ -53,37 +75,57 @@ configuring extensions. The following options are provided to configure the output: * **`marker`**: - Text to find and replace with the Table of Contents. Defaults - to `[TOC]`. + Text to find and replace with the Table of Contents. Defaults to `[TOC]`. + + Set to an empty string to disable searching for a marker, which may save some time, + especially on long documents. - Regardless of whether a `marker` is found in the document, the Table of Contents is - also available as an attribute (`toc`) of the Markdown class. This allows one to insert - the Table of Contents elsewhere in their page template. For example: +* **`title`**: + Title to insert in the Table of Contents' `<div>`. Defaults to `None`. - >>> text = ''' - # Header 1 +* **`anchorlink`**: + Set to `True` to cause all headers to link to themselves. Default is `False`. - ## Header 2 - ''' - >>> md = markdown.Markdown(extensions=['markdown.extensions.toc']) - >>> html = md.convert(text) - >>> render_some_template(context={'body': html, 'toc': md.toc}) +* **`permalink`**: + Set to `True` or a string to generate permanent links at the end of each header. + Useful with Sphinx stylesheets. + + When set to `True` the paragraph symbol (¶ -- `¶`) is used as the link + text. When set to a string, the provided string is used as the link text. + +* **`baselevel`**: + Base level for headers. + + Default: `1` + + The `baselevel` setting allows the header levels to be automatically adjusted to + fit within the hierarchy of your html templates. For example, suppose the + Markdown text for a page should not contain any headers higher than level 3 + (`<h3>`). The following will accomplish that: + + >>> text = ''' + ... #Some Header + ... ## Next Level''' + >>> from markdown.extensions.toc import TocExtension + >>> html = markdown.markdown(text, extensions=[TocExtension(baselevel=3)]) + >>> print html + <h3 id="some_header">Some Header</h3> + <h4 id="next_level">Next Level</h4>' * **`slugify`**: - Callable to generate anchors based on header text. Defaults to a built in - `slugify` method. The callable must accept two arguments, the first - contains the text content of the header and the second contains the - separator. It should then return a string which will be used as the anchor - text. + Callable to generate anchors. -* **`title`**: - Title to insert in the Table of Contents' `<div>`. Defaults to `None`. + Default: `markdown.extensions.headerid.slugify` -* **`anchorlink`**: - Setting to `True` will cause the headers link to themselves. Default is - `False`. + In order to use a different algorithm to define the id attributes, define and + pass in a callable which takes the following two arguments: -* **`permalink`**: - Set to `True` to have this extension generate a Sphinx-style permanent links - near the headers (for use with Sphinx stylesheets). + * `value`: The string to slugify. + * `separator`: The Word Separator. + + The callable must return a string appropriate for use in HTML `id` attributes. + +* **`separator`**: + Word separator. Character which replaces whitespace in id. + Default: `-`
\ No newline at end of file diff --git a/docs/release-2.6.txt b/docs/release-2.6.txt index ca25a46..dee771b 100644 --- a/docs/release-2.6.txt +++ b/docs/release-2.6.txt @@ -96,6 +96,19 @@ Backwards-incompatible Changes be used instead. See the [documentation](reference.html#extension-configs) for a full explaination of the current behavior. +* The [HeaderId][hid] Extension is pending deprecation and will raise a + **`PendingDeprecationWarning`** in version 2.6. The extension will be + deprecated in version 2.7 and raise an error in version 2.8. Use the + [Table of Contents][TOC] Extension instead, which offers most of the + features of the HeaderId Extension and more (support for meta data is missing). + + Extension authors who have been using the `slugify` and `unique` functions + defined in the HeaderId Extension should note that those functions are now + defined in the Table of Contents extension and should adjust their import + statements accordingly (`from markdown.extensions.toc import slugify, unique`). + +[hid]: extensions/headerid.html + What's New in Python-Markdown 2.6 --------------------------------- @@ -110,15 +123,29 @@ What's New in Python-Markdown 2.6 [Meta-Data]: extensions/meta_data.html [YAML]: http://yaml.org/ -* The [TOC] Extension has been refactored. Significantly, the extension now - assigns the Table of Contents to the `toc` attrbibute of the Markdown class - regardless of whether a "marker" was found in the document. Third party - frameworks no longer need to insert a "marker," run the document through - Markdown, then extract the TOC from the document. +* The [Table fo Contents][TOC] Extension has been refactored and some new features + have been added. See the documentation for a full explaination of each feature + listed below: + + * The extension now assigns the Table of Contents to the `toc` attribute of + the Markdown class regardless of whether a "marker" was found in the document. + Third party frameworks no longer need to insert a "marker," run the document + through Markdown, then extract the TOC from the document. - Additionaly, the TOC Extension is now a "registered extension." Therefore, - when the `reset` method of the Markdown class is called, the `toc` attribute - on the Markdown class is cleared (set to an empty string). + * The TOC Extension is now a "registered extension." Therefore, when the `reset` + method of the Markdown class is called, the `toc` attribute on the Markdown + class is cleared (set to an empty string). + + * When the `marker` config option is set to an empty string, the parser completely + skips the process of searching the document for markers. This should save parsing + time when the TOC Extension is being used only to assign ids to headers. + + * A `separator` config option has been added allowing users to override the + separator character used by the slugify function. + + * A `baselevel` config option has been added allowing users to set the base level + of headers in their documents (h1-h6). This allows the header levels to be + automatically adjusted to fit within the hierarchy of an html template. [TOC]: extensions/toc.html diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index c9f2a21..9340a1b 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -19,64 +19,13 @@ from __future__ import absolute_import from __future__ import unicode_literals from . import Extension from ..treeprocessors import Treeprocessor -from ..util import HTML_PLACEHOLDER_RE, parseBoolValue -import re +from ..util import parseBoolValue +from .toc import slugify, unique, stashedHTML2text import logging -import unicodedata +import warnings logger = logging.getLogger('MARKDOWN') - -IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') - - -def slugify(value, separator): - """ Slugify a string, to make it URL friendly. """ - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() - return re.sub('[%s\s]+' % separator, separator, value) - - -def unique(id, ids): - """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ - while id in ids or not id: - m = IDCOUNT_RE.match(id) - if m: - id = '%s_%d' % (m.group(1), int(m.group(2))+1) - else: - id = '%s_%d' % (id, 1) - ids.add(id) - return id - - -def itertext(elem): - """ Loop through all children and return text only. - - Reimplements method of same name added to ElementTree in Python 2.7 - - """ - if elem.text: - yield elem.text - for e in elem: - for s in itertext(e): - yield s - if e.tail: - yield e.tail - - -def stashedHTML2text(text, md): - """ Extract raw HTML, reduce to plain text and swap with placeholder. """ - def _html_sub(m): - """ Substitute raw html with plain text. """ - try: - raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] - except (IndexError, TypeError): - return m.group(0) - if md.safeMode and not safe: - return '' - # Strip out tags and entities - leaveing text - return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) - - return HTML_PLACEHOLDER_RE.sub(_html_sub, text) +logging.captureWarnings(True) class HeaderIdTreeprocessor(Treeprocessor): @@ -94,7 +43,7 @@ class HeaderIdTreeprocessor(Treeprocessor): if "id" in elem.attrib: id = elem.get('id') else: - id = stashedHTML2text(''.join(itertext(elem)), self.md) + id = stashedHTML2text(''.join(elem.itertext()), self.md) id = slugify(id, sep) elem.set('id', unique(id, self.IDs)) if start_level: @@ -127,6 +76,11 @@ class HeaderIdExtension(Extension): super(HeaderIdExtension, self).__init__(*args, **kwargs) + warnings.warn( + 'The HeaderId Extension is pending deprecation. Use the TOC Extension instead.', + PendingDeprecationWarning + ) + def extendMarkdown(self, md, md_globals): md.registerExtension(self) self.processor = HeaderIdTreeprocessor() diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index cea3440..456c623 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -17,9 +17,47 @@ from __future__ import absolute_import from __future__ import unicode_literals from . import Extension from ..treeprocessors import Treeprocessor -from ..util import etree, parseBoolValue, AMP_SUBSTITUTE -from .headerid import slugify, unique, itertext, stashedHTML2text +from ..util import etree, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE import re +import unicodedata + + +def slugify(value, separator): + """ Slugify a string, to make it URL friendly. """ + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() + return re.sub('[%s\s]+' % separator, separator, value) + + +IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') + + +def unique(id, ids): + """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ + while id in ids or not id: + m = IDCOUNT_RE.match(id) + if m: + id = '%s_%d' % (m.group(1), int(m.group(2))+1) + else: + id = '%s_%d' % (id, 1) + ids.add(id) + return id + + +def stashedHTML2text(text, md): + """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): # pragma: no cover + return m.group(0) + if md.safeMode and not safe: # pragma: no cover + return '' + # Strip out tags and entities - leaveing text + return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + + return HTML_PLACEHOLDER_RE.sub(_html_sub, text) def order_toc_list(toc_list): @@ -91,7 +129,9 @@ class TocTreeprocessor(Treeprocessor): self.marker = config["marker"] self.title = config["title"] + self.base_level = int(config["baselevel"]) - 1 self.slugify = config["slugify"] + self.sep = config["separator"] self.use_anchors = parseBoolValue(config["anchorlink"]) self.use_permalinks = parseBoolValue(config["permalink"], False) if self.use_permalinks is None: @@ -108,7 +148,7 @@ class TocTreeprocessor(Treeprocessor): def replace_marker(self, root, elem): ''' Replace marker with elem. ''' for (p, c) in self.iterparent(root): - text = ''.join(itertext(c)).strip() + text = ''.join(c.itertext()).strip() if not text: continue @@ -125,6 +165,13 @@ class TocTreeprocessor(Treeprocessor): p[i] = elem break + def set_level(self, elem): + ''' Adjust header level according to base level. ''' + level = int(elem.tag[-1]) + self.base_level + if level > 6: + level = 6 + elem.tag = 'h%d' % level + def add_anchor(self, c, elem_id): # @ReservedAssignment anchor = etree.Element("a") anchor.text = c.text @@ -176,17 +223,19 @@ class TocTreeprocessor(Treeprocessor): div = etree.Element("div") div.attrib["class"] = "toc" - self.replace_marker(doc, div) + if self.marker: + self.replace_marker(doc, div) toc_list = [] for el in doc.iter(): if self.header_rgx.match(el.tag): - text = ''.join(itertext(el)).strip() + self.set_level(el) + text = ''.join(el.itertext()).strip() # Do not override pre-existing ids if "id" not in el.attrib: elem_id = stashedHTML2text(text, self.markdown) - elem_id = unique(self.slugify(elem_id, '-'), used_ids) + elem_id = unique(self.slugify(elem_id, self.sep), used_ids) el.attrib["id"] = elem_id else: elem_id = el.attrib["id"] @@ -219,12 +268,9 @@ class TocExtension(Extension): def __init__(self, *args, **kwargs): self.config = { - "marker": ["[TOC]", - "Text to find and replace with Table of Contents - " - "Defaults to \"[TOC]\""], - "slugify": [slugify, - "Function to generate anchors based on header text - " - "Defaults to the headerid ext's slugify function."], + "marker": ['[TOC]', + 'Text to find and replace with Table of Contents - ' + 'Set to an empty string to disable. Defaults to "[TOC]"'], "title": ["", "Title to insert into TOC <div> - " "Defaults to an empty string"], @@ -233,7 +279,12 @@ class TocExtension(Extension): "Defaults to False"], "permalink": [0, "True or link text if a Sphinx-style permalink should " - "be added - Defaults to False"] + "be added - Defaults to False"], + "baselevel": ['1', 'Base level for headers.'], + "slugify": [slugify, + "Function to generate anchors based on header text - " + "Defaults to the headerid ext's slugify function."], + 'separator': ['-', 'Word separator. Defaults to "-".'] } super(TocExtension, self).__init__(*args, **kwargs) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index dae8829..6642921 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -394,60 +394,6 @@ class TestHeaderId(unittest.TestCase): '<h1 id="some-header">Some Header</h1>' ) - def testUniqueFunc(self): - """ Test 'unique' function. """ - from markdown.extensions.headerid import unique - ids = set(['foo']) - self.assertEqual(unique('foo', ids), 'foo_1') - self.assertEqual(ids, set(['foo', 'foo_1'])) - - def testUniqueIds(self): - """ Test Unique IDs. """ - - text = '#Header\n#Header\n#Header' - self.assertEqual( - self.md.convert(text), - '<h1 id="header">Header</h1>\n' - '<h1 id="header_1">Header</h1>\n' - '<h1 id="header_2">Header</h1>' - ) - - def testBaseLevel(self): - """ Test Header Base Level. """ - - text = '#Some Header\n## Next Level' - self.assertEqual( - markdown.markdown(text, [markdown.extensions.headerid.HeaderIdExtension(level=3)]), - '<h3 id="some-header">Some Header</h3>\n' - '<h4 id="next-level">Next Level</h4>' - ) - - def testHeaderInlineMarkup(self): - """ Test Header IDs with inline markup. """ - - text = '#Some *Header* with [markup](http://example.com).' - self.assertEqual( - self.md.convert(text), - '<h1 id="some-header-with-markup">Some <em>Header</em> with ' - '<a href="http://example.com">markup</a>.</h1>' - ) - - def testHtmlEntities(self): - """ Test HeaderIDs with HTML Entities. """ - text = '# Foo & bar' - self.assertEqual( - self.md.convert(text), - '<h1 id="foo-bar">Foo & bar</h1>' - ) - - def testRawHtml(self): - """ Test HeaderIDs with raw HTML. """ - text = '# Foo <b>Bar</b> Baz.' - self.assertEqual( - self.md.convert(text), - '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>' - ) - def testNoAutoIds(self): """ Test HeaderIDs with no auto generated IDs. """ @@ -733,6 +679,41 @@ class TestTOC(unittest.TestCase): '</div>\n' ) + def testAlternateMarker(self): + """ Test TOC with user defined marker. """ + md = markdown.Markdown( + extensions=[markdown.extensions.toc.TocExtension(marker='{{marker}}')] + ) + text = '{{marker}}\n\n# Header 1\n\n## Header 2' + self.assertEqual( + md.convert(text), + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#header-1">Header 1</a>' # noqa + '<ul>\n' # noqa + '<li><a href="#header-2">Header 2</a></li>\n' # noqa + '</ul>\n' # noqa + '</li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + '<h1 id="header-1">Header 1</h1>\n' + '<h2 id="header-2">Header 2</h2>' + ) + + def testDisabledMarker(self): + """ Test TOC with disabled marker. """ + md = markdown.Markdown( + extensions=[markdown.extensions.toc.TocExtension(marker='')] + ) + text = '[TOC]\n\n# Header 1\n\n## Header 2' + self.assertEqual( + md.convert(text), + '<p>[TOC]</p>\n' + '<h1 id="header-1">Header 1</h1>\n' + '<h2 id="header-2">Header 2</h2>' + ) + self.assertTrue(md.toc.startswith('<div class="toc">')) + def testReset(self): """ Test TOC Reset. """ self.assertEqual(self.md.toc, '') @@ -741,6 +722,69 @@ class TestTOC(unittest.TestCase): self.md.reset() self.assertEqual(self.md.toc, '') + def testUniqueIds(self): + """ Test Unique IDs. """ + + text = '#Header\n#Header\n#Header' + self.assertEqual( + self.md.convert(text), + '<h1 id="header">Header</h1>\n' + '<h1 id="header_1">Header</h1>\n' + '<h1 id="header_2">Header</h1>' + ) + + def testHtmlEntities(self): + """ Test Headers with HTML Entities. """ + text = '# Foo & bar' + self.assertEqual( + self.md.convert(text), + '<h1 id="foo-bar">Foo & bar</h1>' + ) + + def testRawHtml(self): + """ Test Headers with raw HTML. """ + text = '# Foo <b>Bar</b> Baz.' + self.assertEqual( + self.md.convert(text), + '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>' + ) + + def testBaseLevel(self): + """ Test Header Base Level. """ + md = markdown.Markdown( + extensions=[markdown.extensions.toc.TocExtension(baselevel=5)] + ) + text = '# Some Header\n\n## Next Level\n\n### Too High' + self.assertEqual( + md.convert(text), + '<h5 id="some-header">Some Header</h5>\n' + '<h6 id="next-level">Next Level</h6>\n' + '<h6 id="too-high">Too High</h6>' + ) + self.assertEqual( + md.toc, + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#some-header">Some Header</a>' # noqa + '<ul>\n' # noqa + '<li><a href="#next-level">Next Level</a></li>\n' # noqa + '<li><a href="#too-high">Too High</a></li>\n' # noqa + '</ul>\n' # noqa + '</li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + ) + + def testHeaderInlineMarkup(self): + """ Test Headers with inline markup. """ + + text = '#Some *Header* with [markup](http://example.com).' + self.assertEqual( + self.md.convert(text), + '<h1 id="some-header-with-markup">Some <em>Header</em> with ' + '<a href="http://example.com">markup</a>.</h1>' + ) + def testAnchorLink(self): """ Test TOC Anchorlink. """ md = markdown.Markdown( @@ -783,6 +827,13 @@ class TestTOC(unittest.TestCase): '</div>\n' ) + def testUniqueFunc(self): + """ Test 'unique' function. """ + from markdown.extensions.toc import unique + ids = set(['foo']) + self.assertEqual(unique('foo', ids), 'foo_1') + self.assertEqual(ids, set(['foo', 'foo_1'])) + class TestSmarty(unittest.TestCase): def setUp(self): |