aboutsummaryrefslogtreecommitdiffstats
path: root/markdown/inlinepatterns.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/inlinepatterns.py')
-rw-r--r--markdown/inlinepatterns.py63
1 files changed, 5 insertions, 58 deletions
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index bfdffb3..478b55f 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -47,10 +47,6 @@ from . import util
from . import odict
import re
try: # pragma: no cover
- from urllib.parse import urlparse, urlunparse
-except ImportError: # pragma: no cover
- from urlparse import urlparse, urlunparse
-try: # pragma: no cover
from html import entities
except ImportError: # pragma: no cover
import htmlentitydefs as entities
@@ -73,8 +69,7 @@ def build_inlinepatterns(md_instance, **kwargs):
inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
- if md_instance.safeMode != 'escape':
- inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
+ inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance)
inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE)
inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em')
@@ -204,8 +199,6 @@ class Pattern(object):
self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern,
re.DOTALL | re.UNICODE)
- # Api for Markdown to pass safe_mode into instance
- self.safe_mode = False
if markdown_instance:
self.markdown = markdown_instance
@@ -369,7 +362,7 @@ class LinkPattern(Pattern):
if href:
if href[0] == "<":
href = href[1:-1]
- el.set("href", self.sanitize_url(self.unescape(href.strip())))
+ el.set("href", self.unescape(href.strip()))
else:
el.set("href", "")
@@ -378,52 +371,6 @@ class LinkPattern(Pattern):
el.set("title", title)
return el
- def sanitize_url(self, url):
- """
- Sanitize a url against xss attacks in "safe_mode".
-
- Rather than specifically blacklisting `javascript:alert("XSS")` and all
- its aliases (see <http://ha.ckers.org/xss.html>), we whitelist known
- safe url formats. Most urls contain a network location, however some
- are known not to (i.e.: mailto links). Script urls do not contain a
- location. Additionally, for `javascript:...`, the scheme would be
- "javascript" but some aliases will appear to `urlparse()` to have no
- scheme. On top of that relative links (i.e.: "foo/bar.html") have no
- scheme. Therefore we must check "path", "parameters", "query" and
- "fragment" for any literal colons. We don't check "scheme" for colons
- because it *should* never have any and "netloc" must allow the form:
- `username:password@host:port`.
-
- """
- if not self.markdown.safeMode:
- # Return immediately bipassing parsing.
- return url
-
- try:
- scheme, netloc, path, params, query, fragment = url = urlparse(url)
- except ValueError: # pragma: no cover
- # Bad url - so bad it couldn't be parsed.
- return ''
-
- locless_schemes = ['', 'mailto', 'news']
- allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps']
- if scheme not in allowed_schemes:
- # Not a known (allowed) scheme. Not safe.
- return ''
-
- if netloc == '' and scheme not in locless_schemes: # pragma: no cover
- # This should not happen. Treat as suspect.
- return ''
-
- for part in url[2:]:
- if ":" in part:
- # A colon in "path", "parameters", "query"
- # or "fragment" is suspect.
- return ''
-
- # Url passes all tests. Return url as-is.
- return urlunparse(url)
-
class ImagePattern(LinkPattern):
""" Return a img element from the given match. """
@@ -434,7 +381,7 @@ class ImagePattern(LinkPattern):
src = src_parts[0]
if src[0] == "<" and src[-1] == ">":
src = src[1:-1]
- el.set('src', self.sanitize_url(self.unescape(src)))
+ el.set('src', self.unescape(src))
else:
el.set('src', "")
if len(src_parts) > 1:
@@ -476,7 +423,7 @@ class ReferencePattern(LinkPattern):
def makeTag(self, href, title, text):
el = util.etree.Element('a')
- el.set('href', self.sanitize_url(href))
+ el.set('href', href)
if title:
el.set('title', title)
@@ -488,7 +435,7 @@ class ImageReferencePattern(ReferencePattern):
""" Match to a stored reference and return img element. """
def makeTag(self, href, title, text):
el = util.etree.Element("img")
- el.set("src", self.sanitize_url(href))
+ el.set("src", href)
if title:
el.set("title", title)