From 2e9beaecae75ec01899afa846f775ba7b3105ff8 Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Tue, 2 Jan 2018 11:10:19 -0700 Subject: Make sure regex patterns are raw strings (#614) Python 3.6 is starting to reject invalid escapes. Regular expression patterns should be raw strings to avoid having regex escapes being mistaken for invalid string escapes. Fixes #611. --- markdown/extensions/smarty.py | 20 ++++++++++---------- markdown/extensions/toc.py | 4 ++-- markdown/inlinepatterns.py | 2 +- markdown/util.py | 12 ++++++------ 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 5031bc4..c2af7cd 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -91,16 +91,16 @@ from ..treeprocessors import InlineProcessor # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" endOfWordClass = r"[\s.,;:!?)]" -closeClass = "[^\ \t\r\n\[\{\(\-\u0002\u0003]" +closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" openingQuotesBase = ( - '(\s' # a whitespace char - '| ' # or a non-breaking space entity - '|--' # or dashes - '|–|—' # or unicode - '|&[mn]dash;' # or named dash entities - '|–|—' # or decimal entities - ')' + r'(\s' # a whitespace char + r'| ' # or a non-breaking space entity + r'|--' # or dashes + r'|–|—' # or unicode + r'|&[mn]dash;' # or named dash entities + r'|–|—' # or decimal entities + r')' ) substitutions = { @@ -144,8 +144,8 @@ closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass # All remaining quotes should be opening ones -remainingSingleQuotesRegex = "'" -remainingDoubleQuotesRegex = '"' +remainingSingleQuotesRegex = r"'" +remainingDoubleQuotesRegex = r'"' HTML_STRICT_RE = HTML_RE + r'(?!\>)' diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 2c4a4b5..b222cb4 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -25,8 +25,8 @@ import unicodedata def slugify(value, separator): """ Slugify a string, to make it URL friendly. """ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() - return re.sub('[%s\s]+' % separator, separator, value) + value = re.sub(r'[^\w\s-]', '', value.decode('ascii')).strip().lower() + return re.sub(r'[%s\s]+' % separator, separator, value) IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 2f00b3d..bfdffb3 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -201,7 +201,7 @@ class Pattern(object): """ self.pattern = pattern - self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern, + self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern, re.DOTALL | re.UNICODE) # Api for Markdown to pass safe_mode into instance diff --git a/markdown/util.py b/markdown/util.py index b37e5ae..9e87019 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -27,12 +27,12 @@ Constants you might want to modify BLOCK_LEVEL_ELEMENTS = re.compile( - "^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" - "|script|noscript|form|fieldset|iframe|math" - "|hr|hr/|style|li|dt|dd|thead|tbody" - "|tr|th|td|section|footer|header|group|figure" - "|figcaption|aside|article|canvas|output" - "|progress|video|nav|main)$", + r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" + r"|script|noscript|form|fieldset|iframe|math" + r"|hr|hr/|style|li|dt|dd|thead|tbody" + r"|tr|th|td|section|footer|header|group|figure" + r"|figcaption|aside|article|canvas|output" + r"|progress|video|nav|main)$", re.IGNORECASE ) # Placeholders -- cgit v1.2.3