From 63720f2c05b8c82713fa0237f4d3242c9e7d32dc Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Thu, 13 Feb 2014 17:38:31 +0400 Subject: smarty: Get rid of canonicalize() function. The previous problem happened because canonicalize() removed the space from closeClass regexp. Instead of fixing the function, get rid of it completely. It should also make module load a bit faster. --- markdown/extensions/smarty.py | 67 +++++++++++-------------------------------- 1 file changed, 16 insertions(+), 51 deletions(-) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index a0737b7..f452afc 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -70,32 +70,20 @@ from . import Extension from ..inlinepatterns import HtmlPattern from ..util import parseBoolValue -def canonicalize(regex): - """ - Converts the regexp from the re.VERBOSE form to the canonical form, - i.e. remove all whitespace and ignore comments. - """ - lines = regex.split('\n') - for i in range(len(lines)): - if ' #' in lines[i]: - lines[i] = lines[i][:lines[i].find(' #')] - return ''.join(lines).replace(' ', '') - # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" endOfWordClass = r"[\s.,;:!?)]" closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" -openingQuotesBase = r""" -( - \s | # a whitespace char, or -   | # a non-breaking space entity, or - -- | # dashes, or - –|— | # unicode, or - &[mn]dash; | # named dash entities, or - –|— # decimal entities +openingQuotesBase = ( + '(\s' # a whitespace char + '| ' # or a non-breaking space entity + '|--' # or dashes + '|–|—' # or unicode + '|&[mn]dash;' # or named dash entities + '|–|—' # or decimal entities + ')' ) -""" # Special case if the very first character is a quote # followed by punctuation at a non-word-break. Close the quotes by brute force: @@ -108,41 +96,18 @@ doubleQuoteSetsRe = r""""'(?=\w)""" singleQuoteSetsRe = r"""'"(?=\w)""" # Get most opening double quotes: -openingDoubleQuotesRegex = canonicalize(""" -%s # symbols before the quote -" # the quote -(?=\w) # followed by a word character -""" % openingQuotesBase) +openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase # Double closing quotes: -closingDoubleQuotesRegex = canonicalize(r""" -" -(?=\s) -""") - -closingDoubleQuotesRegex2 = canonicalize(r""" -(?<=%s) # character that indicates the quote should be closing -" -""" % closeClass) +closingDoubleQuotesRegex = r'"(?=\s)' +closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass # Get most opening single quotes: -openingSingleQuotesRegex = canonicalize(r""" -%s # symbols before the quote -' # the quote -(?=\w) # followed by a word character -""" % openingQuotesBase) - -closingSingleQuotesRegex = canonicalize(r""" -(?<=%s) -' -(?!\s | s\b | \d) -""" % closeClass) - -closingSingleQuotesRegex2 = canonicalize(r""" -(?<=%s) -' -(\s | s\b) -""" % closeClass) +openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase + +# Single closing quotes: +closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass +closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass # All remaining quotes should be opening ones remainingSingleQuotesRegex = "'" -- cgit v1.2.3