Fixed #69. url_sanitize no longer crashes on unparsable urls.

Also optimized the code to bypass parsing when not in safe_mode and return immediately upon failure rather than continue parsing when in safe_mode. Note that in Python2.7+ more urls may fail than in older versions because IPv6 support was added to urlparse and it apparently mistakenly identifies some urls as IPv6 when they are not. Seeing this only applies to safe_mode now, I don't really care.
author: Waylan Limberg <waylan@gmail.com> 2011-12-29 02:11:48 +0800
committer: Mike Dirolf <mike@dirolf.com> 2012-01-15 00:45:36 +0800
commit: 35930e0928e19e37f81c906d5d11dfcc1087092b (patch)
tree: e875b58aa7d3a23898b02d90dfa58d6a40399297 /markdown/inlinepatterns.py
parent: 12baab2e34a49530f3b712d2faaf59560ff993ef (diff)
download: markdown-35930e0928e19e37f81c906d5d11dfcc1087092b.tar.gz
markdown-35930e0928e19e37f81c906d5d11dfcc1087092b.tar.bz2
markdown-35930e0928e19e37f81c906d5d11dfcc1087092b.zip
1 files changed, 18 insertions, 9 deletions
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index 109cc05..51b06d9 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -311,20 +311,29 @@ class LinkPattern(Pattern):
         `username:password@host:port`.
 
         """
+        if not self.markdown.safeMode:
+            # Return immediately bipassing parsing.
+            return url
+        
+        try:
+            scheme, netloc, path, params, query, fragment = url = urlparse(url)
+        except ValueError:
+            # Bad url - so bad it couldn't be parsed.
+            return ''
+        
         locless_schemes = ['', 'mailto', 'news']
-        scheme, netloc, path, params, query, fragment = url = urlparse(url)
-        safe_url = False
-        if netloc != '' or scheme in locless_schemes:
-            safe_url = True
+        if netloc == '' or scheme not in locless_schemes:
+            # This fails regardless of anything else. 
+            # Return immediately to save additional proccessing
+            return ''
 
         for part in url[2:]:
             if ":" in part:
-                safe_url = False
+                # Not a safe url
+                return ''
 
-        if self.markdown.safeMode and not safe_url:
-            return ''
-        else:
-            return urlunparse(url)
+        # Url passes all tests. Return url as-is.
+        return urlunparse(url)
 
 class ImagePattern(LinkPattern):
     """ Return a img element from the given match. """
author	Waylan Limberg <waylan@gmail.com>	2011-12-29 02:11:48 +0800
committer	Mike Dirolf <mike@dirolf.com>	2012-01-15 00:45:36 +0800
commit	35930e0928e19e37f81c906d5d11dfcc1087092b (patch)
tree	e875b58aa7d3a23898b02d90dfa58d6a40399297 /markdown/inlinepatterns.py
parent	12baab2e34a49530f3b712d2faaf59560ff993ef (diff)
download	markdown-35930e0928e19e37f81c906d5d11dfcc1087092b.tar.gz markdown-35930e0928e19e37f81c906d5d11dfcc1087092b.tar.bz2 markdown-35930e0928e19e37f81c906d5d11dfcc1087092b.zip