From 7f63b20b819b83afef0ddadc2e210ddce32a2be3 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Sat, 14 Mar 2015 20:39:46 -0400 Subject: Removed deprecated safe_mode. --- markdown/__main__.py | 8 ----- markdown/core.py | 24 +-------------- markdown/extensions/codehilite.py | 3 +- markdown/extensions/extra.py | 17 +++++----- markdown/extensions/fenced_code.py | 2 +- markdown/extensions/smarty.py | 2 +- markdown/extensions/toc.py | 4 +-- markdown/inlinepatterns.py | 63 +++----------------------------------- markdown/postprocessors.py | 21 ++----------- markdown/preprocessors.py | 3 +- markdown/util.py | 5 ++- 11 files changed, 24 insertions(+), 128 deletions(-) (limited to 'markdown') diff --git a/markdown/__main__.py b/markdown/__main__.py index 8b2c916..c29687b 100644 --- a/markdown/__main__.py +++ b/markdown/__main__.py @@ -36,10 +36,6 @@ def parse_options(args=None, values=None): metavar="OUTPUT_FILE") parser.add_option("-e", "--encoding", dest="encoding", help="Encoding for input and output files.",) - parser.add_option("-s", "--safe", dest="safe", default=False, - metavar="SAFE_MODE", - help="Deprecated! 'replace', 'remove' or 'escape' HTML " - "tags in input") parser.add_option("-o", "--output_format", dest="output_format", default='xhtml1', metavar="OUTPUT_FORMAT", help="'xhtml1' (default), 'html4' or 'html5'.") @@ -102,10 +98,6 @@ def parse_options(args=None, values=None): 'lazy_ol': options.lazy_ol } - if options.safe: - # Avoid deprecation warning if user didn't set option - opts['safe_mode'] = options.safe - return opts, options.verbose diff --git a/markdown/core.py b/markdown/core.py index e92aad0..7d9d839 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -2,8 +2,8 @@ from __future__ import absolute_import from __future__ import unicode_literals import codecs import sys -import logging import warnings +import logging import importlib from . import util from .preprocessors import build_preprocessors @@ -65,10 +65,6 @@ class Markdown(object): Note that it is suggested that the more specific formats ("xhtml1" and "html4") be used as "xhtml" or "html" may change in the future if it makes sense at that time. - * safe_mode: Deprecated! Disallow raw html. One of "remove", "replace" - or "escape". - * html_replacement_text: Deprecated! Text used when safe_mode is set - to "replace". * tab_length: Length of tabs in the source. Default: 4 * enable_attributes: Enable the conversion of attributes. Default: True * smart_emphasis: Treat `_connected_words_` intelligently Default: True @@ -80,24 +76,6 @@ class Markdown(object): for option, default in self.option_defaults.items(): setattr(self, option, kwargs.get(option, default)) - self.safeMode = kwargs.get('safe_mode', False) - if self.safeMode and 'enable_attributes' not in kwargs: - # Disable attributes in safeMode when not explicitly set - self.enable_attributes = False - - if 'safe_mode' in kwargs: - warnings.warn('"safe_mode" is deprecated in Python-Markdown. ' - 'Use an HTML sanitizer (like ' - 'Bleach https://bleach.readthedocs.io/) ' - 'if you are parsing untrusted markdown text. ' - 'See the 2.6 release notes for more info', - DeprecationWarning) - - if 'html_replacement_text' in kwargs: - warnings.warn('The "html_replacement_text" keyword is ' - 'deprecated along with "safe_mode".', - DeprecationWarning) - self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!'] diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index ee42d57..9c91c37 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -215,8 +215,7 @@ class HiliteTreeprocessor(Treeprocessor): tab_length=self.markdown.tab_length, use_pygments=self.config['use_pygments'] ) - placeholder = self.markdown.htmlStash.store(code.hilite(), - safe=True) + placeholder = self.markdown.htmlStash.store(code.hilite()) # Clear codeblock in etree instance block.clear() # Change to p element which will later diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py index 587ba64..f59e09e 100644 --- a/markdown/extensions/extra.py +++ b/markdown/extensions/extra.py @@ -58,15 +58,14 @@ class ExtraExtension(Extension): def extendMarkdown(self, md, md_globals): """ Register extension instances. """ md.registerExtensions(extensions, self.config) - if not md.safeMode: - # Turn on processing of markdown text within raw html - md.preprocessors['html_block'].markdown_in_raw = True - md.parser.blockprocessors.add('markdown_block', - MarkdownInHtmlProcessor(md.parser), - '_begin') - md.parser.blockprocessors.tag_counter = -1 - md.parser.blockprocessors.contain_span_tags = re.compile( - r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE) + # Turn on processing of markdown text within raw html + md.preprocessors['html_block'].markdown_in_raw = True + md.parser.blockprocessors.add('markdown_block', + MarkdownInHtmlProcessor(md.parser), + '_begin') + md.parser.blockprocessors.tag_counter = -1 + md.parser.blockprocessors.contain_span_tags = re.compile( + r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE) def makeExtension(*args, **kwargs): diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 392c654..0975bb5 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -92,7 +92,7 @@ class FencedBlockPreprocessor(Preprocessor): code = self.CODE_WRAP % (lang, self._escape(m.group('code'))) - placeholder = self.markdown.htmlStash.store(code, safe=True) + placeholder = self.markdown.htmlStash.store(code) text = '%s\n%s\n%s' % (text[:m.start()], placeholder, text[m.end():]) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index c2af7cd..ba5b6b6 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -163,7 +163,7 @@ class SubstituteTextPattern(HtmlPattern): if isinstance(part, int): result += m.group(part) else: - result += self.markdown.htmlStash.store(part, safe=True) + result += self.markdown.htmlStash.store(part) return result diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index b222cb4..f884f9b 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -49,11 +49,9 @@ def stashedHTML2text(text, md): def _html_sub(m): """ Substitute raw html with plain text. """ try: - raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) - if md.safeMode and not safe: # pragma: no cover - return '' # Strip out tags and entities - leaveing text return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index bfdffb3..478b55f 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -46,10 +46,6 @@ from __future__ import unicode_literals from . import util from . import odict import re -try: # pragma: no cover - from urllib.parse import urlparse, urlunparse -except ImportError: # pragma: no cover - from urlparse import urlparse, urlunparse try: # pragma: no cover from html import entities except ImportError: # pragma: no cover @@ -73,8 +69,7 @@ def build_inlinepatterns(md_instance, **kwargs): inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') - if md_instance.safeMode != 'escape': - inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) + inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance) inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE) inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em') @@ -204,8 +199,6 @@ class Pattern(object): self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern, re.DOTALL | re.UNICODE) - # Api for Markdown to pass safe_mode into instance - self.safe_mode = False if markdown_instance: self.markdown = markdown_instance @@ -369,7 +362,7 @@ class LinkPattern(Pattern): if href: if href[0] == "<": href = href[1:-1] - el.set("href", self.sanitize_url(self.unescape(href.strip()))) + el.set("href", self.unescape(href.strip())) else: el.set("href", "") @@ -378,52 +371,6 @@ class LinkPattern(Pattern): el.set("title", title) return el - def sanitize_url(self, url): - """ - Sanitize a url against xss attacks in "safe_mode". - - Rather than specifically blacklisting `javascript:alert("XSS")` and all - its aliases (see ), we whitelist known - safe url formats. Most urls contain a network location, however some - are known not to (i.e.: mailto links). Script urls do not contain a - location. Additionally, for `javascript:...`, the scheme would be - "javascript" but some aliases will appear to `urlparse()` to have no - scheme. On top of that relative links (i.e.: "foo/bar.html") have no - scheme. Therefore we must check "path", "parameters", "query" and - "fragment" for any literal colons. We don't check "scheme" for colons - because it *should* never have any and "netloc" must allow the form: - `username:password@host:port`. - - """ - if not self.markdown.safeMode: - # Return immediately bipassing parsing. - return url - - try: - scheme, netloc, path, params, query, fragment = url = urlparse(url) - except ValueError: # pragma: no cover - # Bad url - so bad it couldn't be parsed. - return '' - - locless_schemes = ['', 'mailto', 'news'] - allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps'] - if scheme not in allowed_schemes: - # Not a known (allowed) scheme. Not safe. - return '' - - if netloc == '' and scheme not in locless_schemes: # pragma: no cover - # This should not happen. Treat as suspect. - return '' - - for part in url[2:]: - if ":" in part: - # A colon in "path", "parameters", "query" - # or "fragment" is suspect. - return '' - - # Url passes all tests. Return url as-is. - return urlunparse(url) - class ImagePattern(LinkPattern): """ Return a img element from the given match. """ @@ -434,7 +381,7 @@ class ImagePattern(LinkPattern): src = src_parts[0] if src[0] == "<" and src[-1] == ">": src = src[1:-1] - el.set('src', self.sanitize_url(self.unescape(src))) + el.set('src', self.unescape(src)) else: el.set('src', "") if len(src_parts) > 1: @@ -476,7 +423,7 @@ class ReferencePattern(LinkPattern): def makeTag(self, href, title, text): el = util.etree.Element('a') - el.set('href', self.sanitize_url(href)) + el.set('href', href) if title: el.set('title', title) @@ -488,7 +435,7 @@ class ImageReferencePattern(ReferencePattern): """ Match to a stored reference and return img element. """ def makeTag(self, href, title, text): el = util.etree.Element("img") - el.set("src", self.sanitize_url(href)) + el.set("src", href) if title: el.set("title", title) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7b9aa0b..f59e070 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -50,19 +50,11 @@ class RawHtmlPostprocessor(Postprocessor): """ Restore raw html to the document. """ def run(self, text): - """ Iterate over html stash and restore "safe" html. """ + """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.markdown.htmlStash.html_counter): - html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] - if self.markdown.safeMode and not safe: - if str(self.markdown.safeMode).lower() == 'escape': - html = self.escape(html) - elif str(self.markdown.safeMode).lower() == 'remove': - html = '' - else: - html = self.markdown.html_replacement_text - if (self.isblocklevel(html) and - (safe or not self.markdown.safeMode)): + html = self.markdown.htmlStash.rawHtmlBlocks[i] + if self.isblocklevel(html): replacements["

%s

" % (self.markdown.htmlStash.get_placeholder(i))] = \ html + "\n" @@ -74,13 +66,6 @@ class RawHtmlPostprocessor(Postprocessor): return text - def escape(self, html): - """ Basic html escaping """ - html = html.replace('&', '&') - html = html.replace('<', '<') - html = html.replace('>', '>') - return html.replace('"', '"') - def isblocklevel(self, html): m = re.match(r'^\<\/?([^ >]+)', html) if m: diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 1e99afa..d0af51a 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -17,8 +17,7 @@ def build_preprocessors(md_instance, **kwargs): """ Build the default set of preprocessors used by Markdown. """ preprocessors = odict.OrderedDict() preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) - if md_instance.safeMode != 'escape': - preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) + preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) preprocessors["reference"] = ReferencePreprocessor(md_instance) return preprocessors diff --git a/markdown/util.py b/markdown/util.py index 9e87019..8897195 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -141,7 +141,7 @@ class HtmlStash(object): self.tag_counter = 0 self.tag_data = [] # list of dictionaries in the order tags appear - def store(self, html, safe=False): + def store(self, html): """ Saves an HTML segment for later reinsertion. Returns a placeholder string that needs to be inserted into the @@ -150,12 +150,11 @@ class HtmlStash(object): Keyword arguments: * html: an html segment - * safe: label an html segment as safe for safemode Returns : a placeholder string """ - self.rawHtmlBlocks.append((html, safe)) + self.rawHtmlBlocks.append(html) placeholder = self.get_placeholder(self.html_counter) self.html_counter += 1 return placeholder -- cgit v1.2.3