aboutsummaryrefslogtreecommitdiffstats
path: root/markdown/extensions/smarty.py
diff options
context:
space:
mode:
authorDmitry Shachnev <mitya57@gmail.com>2014-02-13 17:38:31 +0400
committerDmitry Shachnev <mitya57@gmail.com>2014-02-13 17:42:06 +0400
commit63720f2c05b8c82713fa0237f4d3242c9e7d32dc (patch)
tree4bcd935d76e272ce8721ef3d1416e34ff0add1df /markdown/extensions/smarty.py
parent2612d0eebf666fe7133b4c93c7cafa5f10c0d8ab (diff)
downloadmarkdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.gz
markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.tar.bz2
markdown-63720f2c05b8c82713fa0237f4d3242c9e7d32dc.zip
smarty: Get rid of canonicalize() function.
The previous problem happened because canonicalize() removed the space from closeClass regexp. Instead of fixing the function, get rid of it completely. It should also make module load a bit faster.
Diffstat (limited to 'markdown/extensions/smarty.py')
-rw-r--r--markdown/extensions/smarty.py67
1 files changed, 16 insertions, 51 deletions
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
index a0737b7..f452afc 100644
--- a/markdown/extensions/smarty.py
+++ b/markdown/extensions/smarty.py
@@ -70,32 +70,20 @@ from . import Extension
from ..inlinepatterns import HtmlPattern
from ..util import parseBoolValue
-def canonicalize(regex):
- """
- Converts the regexp from the re.VERBOSE form to the canonical form,
- i.e. remove all whitespace and ignore comments.
- """
- lines = regex.split('\n')
- for i in range(len(lines)):
- if ' #' in lines[i]:
- lines[i] = lines[i][:lines[i].find(' #')]
- return ''.join(lines).replace(' ', '')
-
# Constants for quote education.
punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
endOfWordClass = r"[\s.,;:!?)]"
closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]"
-openingQuotesBase = r"""
-(
- \s | # a whitespace char, or
- &nbsp; | # a non-breaking space entity, or
- -- | # dashes, or
- –|— | # unicode, or
- &[mn]dash; | # named dash entities, or
- &#8211;|&#8212; # decimal entities
+openingQuotesBase = (
+ '(\s' # a whitespace char
+ '|&nbsp;' # or a non-breaking space entity
+ '|--' # or dashes
+ '|–|—' # or unicode
+ '|&[mn]dash;' # or named dash entities
+ '|&#8211;|&#8212;' # or decimal entities
+ ')'
)
-"""
# Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force:
@@ -108,41 +96,18 @@ doubleQuoteSetsRe = r""""'(?=\w)"""
singleQuoteSetsRe = r"""'"(?=\w)"""
# Get most opening double quotes:
-openingDoubleQuotesRegex = canonicalize("""
-%s # symbols before the quote
-" # the quote
-(?=\w) # followed by a word character
-""" % openingQuotesBase)
+openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase
# Double closing quotes:
-closingDoubleQuotesRegex = canonicalize(r"""
-"
-(?=\s)
-""")
-
-closingDoubleQuotesRegex2 = canonicalize(r"""
-(?<=%s) # character that indicates the quote should be closing
-"
-""" % closeClass)
+closingDoubleQuotesRegex = r'"(?=\s)'
+closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass
# Get most opening single quotes:
-openingSingleQuotesRegex = canonicalize(r"""
-%s # symbols before the quote
-' # the quote
-(?=\w) # followed by a word character
-""" % openingQuotesBase)
-
-closingSingleQuotesRegex = canonicalize(r"""
-(?<=%s)
-'
-(?!\s | s\b | \d)
-""" % closeClass)
-
-closingSingleQuotesRegex2 = canonicalize(r"""
-(?<=%s)
-'
-(\s | s\b)
-""" % closeClass)
+openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
+
+# Single closing quotes:
+closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
+closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass
# All remaining quotes should be opening ones
remainingSingleQuotesRegex = "'"