From 7db56daedf8a6006222f55eeeab748e7789fba89 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Mon, 1 Sep 2014 17:30:59 -0400 Subject: Mark "Safe Mode" as pending deprecation. Both `safe_mode` and `html_replacement_test` keywords are pending deprecation, as are positional args. Closes #337. --- docs/index.txt | 7 ------- docs/reference.txt | 33 ++++++++++++++++-------------- docs/release-2.5.txt | 57 +++++++++++++++++++++++++++++++++++++++++++++++++--- markdown/__init__.py | 27 +++++++++++++++++++------ markdown/__main__.py | 2 +- 5 files changed, 94 insertions(+), 32 deletions(-) diff --git a/docs/index.txt b/docs/index.txt index edf1cbd..bfe1818 100644 --- a/docs/index.txt +++ b/docs/index.txt @@ -54,13 +54,6 @@ features: Python-Markdown can output documents in HTML4, XHTML and HTML5. See the [Library Reference](reference.html#output_format) for details. -* __"Safe Mode"__ - - When using Python-Markdown to parse input from untrusted users on the web, - the handling of raw HTML can be controlled in various ways to prevent - harmful code from being injected into your site. See the - [Library Reference](reference.html#safe_mode) for details. - * __Command Line Interface__ In addition to being a Python Library, a diff --git a/docs/reference.txt b/docs/reference.txt index e1797ad..2e1985d 100644 --- a/docs/reference.txt +++ b/docs/reference.txt @@ -165,10 +165,20 @@ The following options are available on the `markdown.markdown` function: * __`safe_mode`__{: #safe_mode }: Disallow raw html. - If you are using Markdown on a web system which will transform text - provided by untrusted users, you may want to use the "safe_mode" - option which ensures that the user's HTML tags are either replaced, - removed or escaped. (They can still create links using Markdown syntax.) + !!! warning + "`safe_mode`" is pending deprecation and should not be used. + + HTML sanitizers (like [Bleach]) provide a better solution for + dealing with markdown text submitted by untrusted users. + + import markdown + import bleach + html = bleach.clean(markdown.markdown(untrusted_text)) + + See the [release notes] for more info. + +[Bleach]: https://github.com/jsocol/bleach +[release notes]: release-2.5.html The following values are accepted: @@ -200,21 +210,14 @@ The following options are available on the `markdown.markdown` function: "safe_mode" also alters the default value for the [`enable_attributes`](#enable_attributes) option. - !!! seealso "See Also" - HTML sanitizers (like [Bleach]) may provide a better solution for - dealing with markdown text submitted by untrusted users. That way, - both the HTML generated by Markdown and user submited raw HTML are - fully sanitized. - - import markdown - import bleach - html = bleach.clean(markdown.markdown(evil_text)) - -[Bleach]: https://github.com/jsocol/bleach * __`html_replacement_text`__{: #html_replacement_text }: Text used when safe_mode is set to `replace`. Defaults to `[HTML_REMOVED]`. + !!! warning + "`html_replacement_text`" is pending deprecation and should not be used. + See the [release notes] for more info. + * __`tab_length`__{: #tab_length }: Length of tabs in the source. Default: 4 * __`enable_attributes`__{: #enable_attributes}: Enable the conversion of diff --git a/docs/release-2.5.txt b/docs/release-2.5.txt index 044fcb2..207d876 100644 --- a/docs/release-2.5.txt +++ b/docs/release-2.5.txt @@ -23,7 +23,7 @@ Backwards-incompatible Changes [importlib]: https://pypi.python.org/pypi/importlib -* The `force_linenos` config key on the [CodeHilite Extension] has been deprecated +* The `force_linenos` config key on the [CodeHilite Extension] has been **deprecated** and will raise a `KeyError` if provided. In the previous release (2.4), it was issuing a `DeprecationWarning`. The [`linenums`][linenums] keyword should be used instead, which provides more control of the output. @@ -31,11 +31,62 @@ Backwards-incompatible Changes [CodeHilite Extension]: extensions/code_hilite.html [linenums]: extensions/code_hilite.html#usage +* Both `safe_mode` and the associated `html_replacement_text` keywords will be deprecated + in version 2.6 and will raise a **`PendingDeprecationWarning`** in 2.5. The so-called + "safe mode" was never actually "safe" which has resulted in many people having a false + sense of security when using it. As an alternative, the developers of Python-Markdown + recommend that any untrusted content be passed through an HTML sanitizer (like [Bleach]) + after being converted to HTML by markdown. + + If your code previously looked like this: + + html = markdown.markdown(text, same_mode=True) + + Then it is recommended that you change your code to read something like this: + + import bleach + html = bleach.clean(markdown.markdown(text)) + + If you are not interested in sanitizing untrusted text, but simply desire to escape + raw HTML, then that can be accomplished through an extension which removes HTML parsing: + + from markdown.extensions import Extension + + class EscapeHtml(Extension): + def extendMarkdown(self, md, md_globals): + del md.preprocessors['html_block'] + del md.inlinePatterns['html'] + + html = markdown.markdown(text, extensions=[EscapeHtml()]) + + As the HTML would not be parsed with the above Extension, then the searializer will + escape the raw HTML, which is exactly what happens now when `safe_mode="escape"`. + +[Bleach]: http://bleach.readthedocs.org/ + +* Positional arguments on the `markdown.Markdown()` are pending deprecation as are + all except the `text` argument on the `markdown.markdown()` wrapper function. + Only keyword arguments should be used. For example, if your code previosuly + looked like this: + + html = markdown.markdown(text, ['extra']) + + Then it is recommended that you change it to read something like this: + + html = markdown.markdown(text, extensions=['extra']) + + !!! Note + This change is being made as a result of deprecating `"safe_mode"` as the + `safe_mode` argumnet was one of the positional arguments. When that argument + is removed, the two arguments following it will no longer be at the correct + position. It is recomended that you always use keywords when they are supported + for this reason. + * In previous versions of Python-Markdown, the builtin extensions received special status and did not require the full path to be provided. Additionaly, third party extensions whose name started with "mdx_" received the same special treatment. This behavior will be deprecated in version 2.6 and will - raise a `PendingDeprecationWarning` in 2.5. Ensure that you always use the full + raise a **`PendingDeprecationWarning`** in 2.5. Ensure that you always use the full path to your extensions. For example, if you previously did the following: markdown.markdown(text, extensions=['extra']) @@ -53,7 +104,7 @@ Backwards-incompatible Changes * The previously documented method of appending the extension configs as a string to the extension name will be deprecated in Python-Markdown - version 2.6 and will raise a `PendingDeprecationWarning` in 2.5. The + version 2.6 and will raise a **`PendingDeprecationWarning`** in 2.5. The [extension_configs](reference.html#extension_configs) keyword should be used instead. See the [documentation](reference.html#extension-configs) for a full explaination of the current behavior. diff --git a/markdown/__init__.py b/markdown/__init__.py index 6bf84d0..502ef08 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -50,6 +50,7 @@ from .serializers import to_html_string, to_xhtml_string __all__ = ['Markdown', 'markdown', 'markdownFromFile'] logger = logging.getLogger('MARKDOWN') +logging.captureWarnings(True) class Markdown(object): @@ -98,8 +99,8 @@ class Markdown(object): Note that it is suggested that the more specific formats ("xhtml1" and "html4") be used as "xhtml" or "html" may change in the future if it makes sense at that time. - * safe_mode: Disallow raw html. One of "remove", "replace" or "escape". - * html_replacement_text: Text used when safe_mode is set to "replace". + * safe_mode: Deprecated! Disallow raw html. One of "remove", "replace" or "escape". + * html_replacement_text: Deprecated! Text used when safe_mode is set to "replace". * tab_length: Length of tabs in the source. Default: 4 * enable_attributes: Enable the conversion of attributes. Default: True * smart_emphasis: Treat `_connected_words_` intelligently Default: True @@ -109,14 +110,16 @@ class Markdown(object): # For backward compatibility, loop through old positional args pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format'] - c = 0 - for arg in args: + for c, arg in enumerate(args): if pos[c] not in kwargs: kwargs[pos[c]] = arg - c += 1 - if c == len(pos): #pragma: no cover + if c+1 == len(pos): #pragma: no cover # ignore any additional args break + if len(args): + warnings.warn('Positional arguments are pending depreacted in Markdown ' + 'and will be deprecated in version 2.6. Use keyword ' + 'arguments only.', PendingDeprecationWarning) # Loop through kwargs and assign defaults for option, default in self.option_defaults.items(): @@ -127,6 +130,18 @@ class Markdown(object): # Disable attributes in safeMode when not explicitly set self.enable_attributes = False + if 'safe_mode' in kwargs: + warnings.warn('"safe_mode" is pending deprecation in Python-Markdown ' + 'and will be deprecated in version 2.6. Use an HTML ' + 'sanitizer (like Bleach http://bleach.readthedocs.org/) ' + 'if you are parsing untrusted markdown text. See the ' + '2.5 release notes for more info', PendingDeprecationWarning) + + if 'html_replacement_text' in kwargs: + warnings.warn('The "html_replacement_text" keyword is pending deprecation ' + 'in Python-Markdown and will be deprecated in version 2.6 ' + 'along with "safe_mode".', PendingDeprecationWarning) + self.registeredExtensions = [] self.docType = "" self.stripTopLevelTags = True diff --git a/markdown/__main__.py b/markdown/__main__.py index 8dd8d38..d085540 100644 --- a/markdown/__main__.py +++ b/markdown/__main__.py @@ -36,7 +36,7 @@ def parse_options(args=None, values=None): help="Encoding for input and output files.",) parser.add_option("-s", "--safe", dest="safe", default=False, metavar="SAFE_MODE", - help="'replace', 'remove' or 'escape' HTML tags in input") + help="Deprecated! 'replace', 'remove' or 'escape' HTML tags in input") parser.add_option("-o", "--output_format", dest="output_format", default='xhtml1', metavar="OUTPUT_FORMAT", help="'xhtml1' (default), 'html4' or 'html5'.") -- cgit v1.2.3