smarty: Get rid of canonicalize() function.

The previous problem happened because canonicalize() removed the space from closeClass regexp. Instead of fixing the function, get rid of it completely. It should also make module load a bit faster.
author: Dmitry Shachnev <mitya57@gmail.com> 2014-02-13 17:38:31 +0400
committer: Dmitry Shachnev <mitya57@gmail.com> 2014-02-13 17:42:06 +0400
commit: 63720f2c05b8c82713fa0237f4d3242c9e7d32dc (patch)
tree: 4bcd935d76e272ce8721ef3d1416e34ff0add1df
parent: 2612d0eebf666fe7133b4c93c7cafa5f10c0d8ab (diff)
download: markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.gz
markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.bz2
markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.zip
1 files changed, 16 insertions, 51 deletions
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
index a0737b7..f452afc 100644
--- a/markdown/extensions/smarty.py
+++ b/markdown/extensions/smarty.py
@@ -70,32 +70,20 @@ from . import Extension
 from ..inlinepatterns import HtmlPattern
 from ..util import parseBoolValue
 
-def canonicalize(regex):
-    """
-    Converts the regexp from the re.VERBOSE form to the canonical form,
-    i.e. remove all whitespace and ignore comments.
-    """
-    lines = regex.split('\n')
-    for i in range(len(lines)):
-        if ' #' in lines[i]:
-            lines[i] = lines[i][:lines[i].find(' #')]
-    return ''.join(lines).replace(' ', '')
-
 # Constants for quote education.
 punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
 endOfWordClass = r"[\s.,;:!?)]"
 closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]"
 
-openingQuotesBase = r"""
-(
-    \s            | # a whitespace char, or
-    &nbsp;        | # a non-breaking space entity, or
-    --            | # dashes, or
-    –|—           | # unicode, or
-    &[mn]dash;    | # named dash entities, or
-    &#8211;|&#8212; # decimal entities
+openingQuotesBase = (
+   '(\s'              # a  whitespace char
+   '|&nbsp;'          # or a non-breaking space entity
+   '|--'              # or dashes
+   '|–|—'             # or unicode
+   '|&[mn]dash;'      # or named dash entities
+   '|&#8211;|&#8212;' # or decimal entities
+   ')'
 )
-"""
 
 # Special case if the very first character is a quote
 # followed by punctuation at a non-word-break. Close the quotes by brute force:
@@ -108,41 +96,18 @@ doubleQuoteSetsRe = r""""'(?=\w)"""
 singleQuoteSetsRe = r"""'"(?=\w)"""
 
 # Get most opening double quotes:
-openingDoubleQuotesRegex = canonicalize("""
-%s      # symbols before the quote
-"       # the quote
-(?=\w)  # followed by a word character
-""" % openingQuotesBase)
+openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase
 
 # Double closing quotes:
-closingDoubleQuotesRegex = canonicalize(r"""
-"
-(?=\s)
-""")
-
-closingDoubleQuotesRegex2 = canonicalize(r"""
-(?<=%s)  # character that indicates the quote should be closing
-"
-""" % closeClass)
+closingDoubleQuotesRegex = r'"(?=\s)'
+closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass
 
 # Get most opening single quotes:
-openingSingleQuotesRegex = canonicalize(r"""
-%s      # symbols before the quote
-'       # the quote
-(?=\w)  # followed by a word character
-""" % openingQuotesBase)
-
-closingSingleQuotesRegex = canonicalize(r"""
-(?<=%s)
-'
-(?!\s | s\b | \d)
-""" % closeClass)
-
-closingSingleQuotesRegex2 = canonicalize(r"""
-(?<=%s)
-'
-(\s | s\b)
-""" % closeClass)
+openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
+
+# Single closing quotes:
+closingSingleQuotesRegex  = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
+closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass
 
 # All remaining quotes should be opening ones
 remainingSingleQuotesRegex = "'"
author	Dmitry Shachnev <mitya57@gmail.com>	2014-02-13 17:38:31 +0400
committer	Dmitry Shachnev <mitya57@gmail.com>	2014-02-13 17:42:06 +0400
commit	63720f2c05b8c82713fa0237f4d3242c9e7d32dc (patch)
tree	4bcd935d76e272ce8721ef3d1416e34ff0add1df
parent	2612d0eebf666fe7133b4c93c7cafa5f10c0d8ab (diff)
download	markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.gz markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.bz2 markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.zip