From 61066d49189b156a54778b7ef70dfd0a3ed86e93 Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Fri, 26 Jul 2013 09:13:20 +0400 Subject: Add smarty extension, based on SmartyPants library --- markdown/extensions/smarty.py | 235 ++++++++++++++++++++++++++++++++++++++++++ tests/extensions/smarty.html | 16 +++ tests/extensions/smarty.txt | 19 ++++ tests/extensions/test.cfg | 3 + 4 files changed, 273 insertions(+) create mode 100644 markdown/extensions/smarty.py create mode 100644 tests/extensions/smarty.html create mode 100644 tests/extensions/smarty.txt diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py new file mode 100644 index 0000000..90d0c41 --- /dev/null +++ b/markdown/extensions/smarty.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +# Smarty extension for Python-Markdown +# Author: 2013, Dmitry Shachnev + +# SmartyPants license: +# +# Copyright (c) 2003 John Gruber +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name "SmartyPants" nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# This software is provided by the copyright holders and contributors "as +# is" and any express or implied warranties, including, but not limited +# to, the implied warranties of merchantability and fitness for a +# particular purpose are disclaimed. In no event shall the copyright +# owner or contributors be liable for any direct, indirect, incidental, +# special, exemplary, or consequential damages (including, but not +# limited to, procurement of substitute goods or services; loss of use, +# data, or profits; or business interruption) however caused and on any +# theory of liability, whether in contract, strict liability, or tort +# (including negligence or otherwise) arising in any way out of the use +# of this software, even if advised of the possibility of such damage. +# +# +# smartypants.py license: +# +# smartypants.py is a derivative work of SmartyPants. +# Copyright (c) 2004, 2007 Chad Miller +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# This software is provided by the copyright holders and contributors "as +# is" and any express or implied warranties, including, but not limited +# to, the implied warranties of merchantability and fitness for a +# particular purpose are disclaimed. In no event shall the copyright +# owner or contributors be liable for any direct, indirect, incidental, +# special, exemplary, or consequential damages (including, but not +# limited to, procurement of substitute goods or services; loss of use, +# data, or profits; or business interruption) however caused and on any +# theory of liability, whether in contract, strict liability, or tort +# (including negligence or otherwise) arising in any way out of the use +# of this software, even if advised of the possibility of such damage. + +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import HtmlPattern + +def canonicalize(regex): + """ + Converts the regexp from the re.VERBOSE form to the canonical form, + i.e. remove all whitespace and ignore comments. + """ + lines = regex.split('\n') + for i in range(len(lines)): + if ' #' in lines[i]: + lines[i] = lines[i][:lines[i].find(' #')] + return ''.join(lines).replace(' ', '') + +# Constants for quote education. +punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" +endOfWordClass = r"[\s.,;:!?)]" +closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" + +openingQuotesBase = r""" +( + \s | # a whitespace char, or +   | # a non-breaking space entity, or + -- | # dashes, or + –|— | # unicode, or + &[mn]dash; | # named dash entities, or + –|— # decimal entities +) +""" + +# Special case if the very first character is a quote +# followed by punctuation at a non-word-break. Close the quotes by brute force: +singleQuoteStartRe = r"^'(?=%s\\B)" % punctClass +doubleQuoteStartRe = r'^"(?=%s\\B)' % punctClass + +# Special case for double sets of quotes, e.g.: +#

He said, "'Quoted' words in a larger quote."

+doubleQuoteSetsRe = r""""'(?=\w)""" +singleQuoteSetsRe = r"""'"(?=\w)""" + +# Special case for decade abbreviations (the '80s): +decadeAbbrRe = r"""\b'(?=\d{2}s)""" + +# Get most opening double quotes: +openingDoubleQuotesRegex = canonicalize(""" +%s # symbols before the quote +" # the quote +(?=\w) # followed by a word character +""" % openingQuotesBase) + +# Double closing quotes: +closingDoubleQuotesRegex = canonicalize(r""" +" +(?=\s) +""") + +closingDoubleQuotesRegex2 = canonicalize(r""" +(?<=%s) # character that indicates the quote should be closing +" +""" % closeClass) + +# Get most opening single quotes: +openingSingleQuotesRegex = canonicalize(r""" +%s # symbols before the quote +' # the quote +(?=\w) # followed by a word character +""" % openingQuotesBase) + +closingSingleQuotesRegex = canonicalize(r""" +(?<=%s) +' +(?!\s | s\b | \d) +""" % closeClass) + +closingSingleQuotesRegex2 = canonicalize(r""" +(?<=%s) +' +(\s | s\b) +""" % closeClass) + +# All remaining quotes should be opening ones +remainingSingleQuotesRegex = "'" +remainingDoubleQuotesRegex = '"' + +for name in ('mdash', 'ndash', 'hellip', 'lsquo', 'rsquo', 'ldquo', 'rdquo'): + locals()[name] = '&%s;' % name + + +class SubstituteTextPattern(HtmlPattern): + def __init__(self, pattern, replace, markdown_instance): + """ Replaces matches with some text. """ + HtmlPattern.__init__(self, pattern) + self.replace = replace + self.markdown = markdown_instance + + def handleMatch(self, m): + result = '' + for part in self.replace: + if isinstance(part, int): + result += m.group(part) + else: + result += self.markdown.htmlStash.store(part, safe=True) + return result + +class SmartyExtension(Extension): + def __init__(self, configs): + self.config = { + 'smart_quotes': [True, 'Educate quotes'], + 'smart_dashes': [True, 'Educate dashes'], + 'smart_ellipses': [True, 'Educate ellipses'] + } + for key, value in configs: + if value.lower() in ('true', 1): + value = True + elif value.lower() in ('false', 0): + value = False + else: + raise ValueError('Cannot parse bool value: %s' % value) + self.setConfig(key, value) + + def _addPatterns(self, md, patterns, serie): + for ind, pattern in enumerate(patterns): + pattern += (md,) + pattern = SubstituteTextPattern(*pattern) + after = ('>smarty-%s-%d' % (serie, ind - 1) if ind else '>entity') + name = 'smarty-%s-%d' % (serie, ind) + md.inlinePatterns.add(name, pattern, after) + + def educateDashes(self, md): + emDashesPattern = SubstituteTextPattern(r'(?entity') + md.inlinePatterns.add('smarty-en-dashes', enDashesPattern, + '>smarty-em-dashes') + + def educateEllipses(self, md): + ellipsesPattern = SubstituteTextPattern(r'(?entity') + + def educateQuotes(self, md): + patterns = ( + (singleQuoteStartRe, (rsquo,)), + (doubleQuoteStartRe, (rdquo,)), + (doubleQuoteSetsRe, (ldquo + lsquo,)), + (singleQuoteSetsRe, (lsquo + ldquo,)), + (openingSingleQuotesRegex, (2, lsquo)), + (closingSingleQuotesRegex, (rsquo,)), + (closingSingleQuotesRegex2, (rsquo, 2)), + (remainingSingleQuotesRegex, (lsquo,)), + (openingDoubleQuotesRegex, (2, ldquo)), + (closingDoubleQuotesRegex, (rdquo,)), + (closingDoubleQuotesRegex2, (rdquo,)), + (remainingDoubleQuotesRegex, (ldquo,)) + ) + self._addPatterns(md, patterns, 'quotes') + + def extendMarkdown(self, md, md_globals): + configs = self.getConfigs() + if configs['smart_quotes']: + self.educateQuotes(md) + if configs['smart_dashes']: + self.educateDashes(md) + if configs['smart_ellipses']: + self.educateEllipses(md) + +def makeExtension(configs=None): + return SmartyExtension(configs) diff --git a/tests/extensions/smarty.html b/tests/extensions/smarty.html new file mode 100644 index 0000000..2485ab7 --- /dev/null +++ b/tests/extensions/smarty.html @@ -0,0 +1,16 @@ +

1440–80’s
+1440–‘80s
+1440—‘80s
+1960s
+1960’s
+one two ‘60s
+‘60s

+

“Isn’t this fun”? — she said…
+“‘Quoted’ words in a larger quote.”
+“quoted” text and bold “quoted” text
+‘quoted’ text and bold ‘quoted’ text
+em-dashes (—) and ellipes (…)
+“Link” — she said.

+
+

Escaped -- ndash
+Escaped ellipsis...

\ No newline at end of file diff --git a/tests/extensions/smarty.txt b/tests/extensions/smarty.txt new file mode 100644 index 0000000..4015f71 --- /dev/null +++ b/tests/extensions/smarty.txt @@ -0,0 +1,19 @@ +1440--80's +1440--'80s +1440---'80s +1960s +1960's +one two '60s +'60s + +"Isn't this fun"? --- she said... +"'Quoted' words in a larger quote." +"quoted" text and **bold "quoted" text** +'quoted' text and **bold 'quoted' text** +em-dashes (---) and ellipes (...) +"[Link](http://example.com)" --- she said. + +--- -- --- + +Escaped \-- ndash +Escaped ellipsis\... \ No newline at end of file diff --git a/tests/extensions/test.cfg b/tests/extensions/test.cfg index 42145c1..1a13b1c 100644 --- a/tests/extensions/test.cfg +++ b/tests/extensions/test.cfg @@ -38,3 +38,6 @@ extensions=nl2br,attr_list [admonition] extensions=admonition + +[smarty] +extensions=smarty -- cgit v1.2.3