From a8e06b7cef5c5b0b372b9a0db0f56c4e7094e2bd Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Thu, 13 Dec 2012 14:05:33 -0500 Subject: Fixed #164. attr_list extension attribute names are now sanitized and won't crash the serealizer. --- markdown/extensions/attr_list.py | 15 +++++++++++++-- tests/extensions/attr_list.html | 3 ++- tests/extensions/attr_list.txt | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 0aa18e0..36f3e3a 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -67,6 +67,10 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE) BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) INLINE_RE = re.compile(r'^%s' % BASE_RE) + NAME_RE = re.compile(r'[^A-Z_a-z\xc0-\xd6\xd8-\xf6\u00f8-\u02ff\u0370-\u037d' + '\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef' + '\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\u10000-\ueffff' + '\-\.0-9\xb7\u0300-\u036f\u203f-\u2040]+') def run(self, doc): for elem in doc.getiterator(): @@ -114,8 +118,15 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): else: elem.set('class', v) else: - # assing attr k with v - elem.set(k, v) + # assign attr k with v + elem.set(self.sanitize_name(k), v) + + def sanitize_name(self, name): + """ + Sanitize name as 'an XML Name, minus the ":"'. + See http://www.w3.org/TR/REC-xml-names/#NT-NCName + """ + return self.NAME_RE.sub('_', name) class AttrListExtension(markdown.extensions.Extension): diff --git a/tests/extensions/attr_list.html b/tests/extensions/attr_list.html index 1e9c182..f50cd6a 100644 --- a/tests/extensions/attr_list.html +++ b/tests/extensions/attr_list.html @@ -14,4 +14,5 @@ And a nested

No colon for compatability with Headerid ext

-

Also a codespan: {: .someclass}.

\ No newline at end of file +

Also a codespan: {: .someclass}.

+

Bad Syntax

\ No newline at end of file diff --git a/tests/extensions/attr_list.txt b/tests/extensions/attr_list.txt index d7ed274..cd7f398 100644 --- a/tests/extensions/attr_list.txt +++ b/tests/extensions/attr_list.txt @@ -33,3 +33,4 @@ Now test overrides Also a codespan: `{: .someclass}`{: .foo}. {: #the_end} +### Bad Syntax { {: #hash5 } -- cgit v1.2.3