diff options
author | Dmitry Shachnev <mitya57@gmail.com> | 2014-02-13 17:38:31 +0400 |
---|---|---|
committer | Dmitry Shachnev <mitya57@gmail.com> | 2014-02-13 17:42:06 +0400 |
commit | 63720f2c05b8c82713fa0237f4d3242c9e7d32dc (patch) | |
tree | 4bcd935d76e272ce8721ef3d1416e34ff0add1df | |
parent | 2612d0eebf666fe7133b4c93c7cafa5f10c0d8ab (diff) | |
download | markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.gz markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.bz2 markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.zip |
smarty: Get rid of canonicalize() function.
The previous problem happened because canonicalize() removed the
space from closeClass regexp.
Instead of fixing the function, get rid of it completely. It should
also make module load a bit faster.
-rw-r--r-- | markdown/extensions/smarty.py | 67 |
1 files changed, 16 insertions, 51 deletions
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index a0737b7..f452afc 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -70,32 +70,20 @@ from . import Extension from ..inlinepatterns import HtmlPattern from ..util import parseBoolValue -def canonicalize(regex): - """ - Converts the regexp from the re.VERBOSE form to the canonical form, - i.e. remove all whitespace and ignore comments. - """ - lines = regex.split('\n') - for i in range(len(lines)): - if ' #' in lines[i]: - lines[i] = lines[i][:lines[i].find(' #')] - return ''.join(lines).replace(' ', '') - # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" endOfWordClass = r"[\s.,;:!?)]" closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" -openingQuotesBase = r""" -( - \s | # a whitespace char, or - | # a non-breaking space entity, or - -- | # dashes, or - –|— | # unicode, or - &[mn]dash; | # named dash entities, or - –|— # decimal entities +openingQuotesBase = ( + '(\s' # a whitespace char + '| ' # or a non-breaking space entity + '|--' # or dashes + '|–|—' # or unicode + '|&[mn]dash;' # or named dash entities + '|–|—' # or decimal entities + ')' ) -""" # Special case if the very first character is a quote # followed by punctuation at a non-word-break. Close the quotes by brute force: @@ -108,41 +96,18 @@ doubleQuoteSetsRe = r""""'(?=\w)""" singleQuoteSetsRe = r"""'"(?=\w)""" # Get most opening double quotes: -openingDoubleQuotesRegex = canonicalize(""" -%s # symbols before the quote -" # the quote -(?=\w) # followed by a word character -""" % openingQuotesBase) +openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase # Double closing quotes: -closingDoubleQuotesRegex = canonicalize(r""" -" -(?=\s) -""") - -closingDoubleQuotesRegex2 = canonicalize(r""" -(?<=%s) # character that indicates the quote should be closing -" -""" % closeClass) +closingDoubleQuotesRegex = r'"(?=\s)' +closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass # Get most opening single quotes: -openingSingleQuotesRegex = canonicalize(r""" -%s # symbols before the quote -' # the quote -(?=\w) # followed by a word character -""" % openingQuotesBase) - -closingSingleQuotesRegex = canonicalize(r""" -(?<=%s) -' -(?!\s | s\b | \d) -""" % closeClass) - -closingSingleQuotesRegex2 = canonicalize(r""" -(?<=%s) -' -(\s | s\b) -""" % closeClass) +openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase + +# Single closing quotes: +closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass +closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass # All remaining quotes should be opening ones remainingSingleQuotesRegex = "'" |