aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Shachnev <mitya57@gmail.com>2014-02-13 17:38:31 +0400
committerDmitry Shachnev <mitya57@gmail.com>2014-02-13 17:42:06 +0400
commit63720f2c05b8c82713fa0237f4d3242c9e7d32dc (patch)
tree4bcd935d76e272ce8721ef3d1416e34ff0add1df
parent2612d0eebf666fe7133b4c93c7cafa5f10c0d8ab (diff)
downloadmarkdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.gz
markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.bz2
markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.zip
smarty: Get rid of canonicalize() function.
The previous problem happened because canonicalize() removed the space from closeClass regexp. Instead of fixing the function, get rid of it completely. It should also make module load a bit faster.
-rw-r--r--markdown/extensions/smarty.py67
1 files changed, 16 insertions, 51 deletions
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
index a0737b7..f452afc 100644
--- a/markdown/extensions/smarty.py
+++ b/markdown/extensions/smarty.py
@@ -70,32 +70,20 @@ from . import Extension
from ..inlinepatterns import HtmlPattern
from ..util import parseBoolValue
-def canonicalize(regex):
- """
- Converts the regexp from the re.VERBOSE form to the canonical form,
- i.e. remove all whitespace and ignore comments.
- """
- lines = regex.split('\n')
- for i in range(len(lines)):
- if ' #' in lines[i]:
- lines[i] = lines[i][:lines[i].find(' #')]
- return ''.join(lines).replace(' ', '')
-
# Constants for quote education.
punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
endOfWordClass = r"[\s.,;:!?)]"
closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]"
-openingQuotesBase = r"""
-(
- \s | # a whitespace char, or
- &nbsp; | # a non-breaking space entity, or
- -- | # dashes, or
- –|— | # unicode, or
- &[mn]dash; | # named dash entities, or
- &#8211;|&#8212; # decimal entities
+openingQuotesBase = (
+ '(\s' # a whitespace char
+ '|&nbsp;' # or a non-breaking space entity
+ '|--' # or dashes
+ '|–|—' # or unicode
+ '|&[mn]dash;' # or named dash entities
+ '|&#8211;|&#8212;' # or decimal entities
+ ')'
)
-"""
# Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force:
@@ -108,41 +96,18 @@ doubleQuoteSetsRe = r""""'(?=\w)"""
singleQuoteSetsRe = r"""'"(?=\w)"""
# Get most opening double quotes:
-openingDoubleQuotesRegex = canonicalize("""
-%s # symbols before the quote
-" # the quote
-(?=\w) # followed by a word character
-""" % openingQuotesBase)
+openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase
# Double closing quotes:
-closingDoubleQuotesRegex = canonicalize(r"""
-"
-(?=\s)
-""")
-
-closingDoubleQuotesRegex2 = canonicalize(r"""
-(?<=%s) # character that indicates the quote should be closing
-"
-""" % closeClass)
+closingDoubleQuotesRegex = r'"(?=\s)'
+closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass
# Get most opening single quotes:
-openingSingleQuotesRegex = canonicalize(r"""
-%s # symbols before the quote
-' # the quote
-(?=\w) # followed by a word character
-""" % openingQuotesBase)
-
-closingSingleQuotesRegex = canonicalize(r"""
-(?<=%s)
-'
-(?!\s | s\b | \d)
-""" % closeClass)
-
-closingSingleQuotesRegex2 = canonicalize(r"""
-(?<=%s)
-'
-(\s | s\b)
-""" % closeClass)
+openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
+
+# Single closing quotes:
+closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
+closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass
# All remaining quotes should be opening ones
remainingSingleQuotesRegex = "'"