From 61066d49189b156a54778b7ef70dfd0a3ed86e93 Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Fri, 26 Jul 2013 09:13:20 +0400 Subject: Add smarty extension, based on SmartyPants library --- markdown/extensions/smarty.py | 235 ++++++++++++++++++++++++++++++++++++++++++ tests/extensions/smarty.html | 16 +++ tests/extensions/smarty.txt | 19 ++++ tests/extensions/test.cfg | 3 + 4 files changed, 273 insertions(+) create mode 100644 markdown/extensions/smarty.py create mode 100644 tests/extensions/smarty.html create mode 100644 tests/extensions/smarty.txt diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py new file mode 100644 index 0000000..90d0c41 --- /dev/null +++ b/markdown/extensions/smarty.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +# Smarty extension for Python-Markdown +# Author: 2013, Dmitry Shachnev + +# SmartyPants license: +# +# Copyright (c) 2003 John Gruber +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name "SmartyPants" nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# This software is provided by the copyright holders and contributors "as +# is" and any express or implied warranties, including, but not limited +# to, the implied warranties of merchantability and fitness for a +# particular purpose are disclaimed. In no event shall the copyright +# owner or contributors be liable for any direct, indirect, incidental, +# special, exemplary, or consequential damages (including, but not +# limited to, procurement of substitute goods or services; loss of use, +# data, or profits; or business interruption) however caused and on any +# theory of liability, whether in contract, strict liability, or tort +# (including negligence or otherwise) arising in any way out of the use +# of this software, even if advised of the possibility of such damage. +# +# +# smartypants.py license: +# +# smartypants.py is a derivative work of SmartyPants. +# Copyright (c) 2004, 2007 Chad Miller +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# This software is provided by the copyright holders and contributors "as +# is" and any express or implied warranties, including, but not limited +# to, the implied warranties of merchantability and fitness for a +# particular purpose are disclaimed. In no event shall the copyright +# owner or contributors be liable for any direct, indirect, incidental, +# special, exemplary, or consequential damages (including, but not +# limited to, procurement of substitute goods or services; loss of use, +# data, or profits; or business interruption) however caused and on any +# theory of liability, whether in contract, strict liability, or tort +# (including negligence or otherwise) arising in any way out of the use +# of this software, even if advised of the possibility of such damage. + +from __future__ import unicode_literals +from . import Extension +from ..inlinepatterns import HtmlPattern + +def canonicalize(regex): + """ + Converts the regexp from the re.VERBOSE form to the canonical form, + i.e. remove all whitespace and ignore comments. + """ + lines = regex.split('\n') + for i in range(len(lines)): + if ' #' in lines[i]: + lines[i] = lines[i][:lines[i].find(' #')] + return ''.join(lines).replace(' ', '') + +# Constants for quote education. +punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" +endOfWordClass = r"[\s.,;:!?)]" +closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" + +openingQuotesBase = r""" +( + \s | # a whitespace char, or +   | # a non-breaking space entity, or + -- | # dashes, or + –|— | # unicode, or + &[mn]dash; | # named dash entities, or + –|— # decimal entities +) +""" + +# Special case if the very first character is a quote +# followed by punctuation at a non-word-break. Close the quotes by brute force: +singleQuoteStartRe = r"^'(?=%s\\B)" % punctClass +doubleQuoteStartRe = r'^"(?=%s\\B)' % punctClass + +# Special case for double sets of quotes, e.g.: +#

He said, "'Quoted' words in a larger quote."

+doubleQuoteSetsRe = r""""'(?=\w)""" +singleQuoteSetsRe = r"""'"(?=\w)""" + +# Special case for decade abbreviations (the '80s): +decadeAbbrRe = r"""\b'(?=\d{2}s)""" + +# Get most opening double quotes: +openingDoubleQuotesRegex = canonicalize(""" +%s # symbols before the quote +" # the quote +(?=\w) # followed by a word character +""" % openingQuotesBase) + +# Double closing quotes: +closingDoubleQuotesRegex = canonicalize(r""" +" +(?=\s) +""") + +closingDoubleQuotesRegex2 = canonicalize(r""" +(?<=%s) # character that indicates the quote should be closing +" +""" % closeClass) + +# Get most opening single quotes: +openingSingleQuotesRegex = canonicalize(r""" +%s # symbols before the quote +' # the quote +(?=\w) # followed by a word character +""" % openingQuotesBase) + +closingSingleQuotesRegex = canonicalize(r""" +(?<=%s) +' +(?!\s | s\b | \d) +""" % closeClass) + +closingSingleQuotesRegex2 = canonicalize(r""" +(?<=%s) +' +(\s | s\b) +""" % closeClass) + +# All remaining quotes should be opening ones +remainingSingleQuotesRegex = "'" +remainingDoubleQuotesRegex = '"' + +for name in ('mdash', 'ndash', 'hellip', 'lsquo', 'rsquo', 'ldquo', 'rdquo'): + locals()[name] = '&%s;' % name + + +class SubstituteTextPattern(HtmlPattern): + def __init__(self, pattern, replace, markdown_instance): + """ Replaces matches with some text. """ + HtmlPattern.__init__(self, pattern) + self.replace = replace + self.markdown = markdown_instance + + def handleMatch(self, m): + result = '' + for part in self.replace: + if isinstance(part, int): + result += m.group(part) + else: + result += self.markdown.htmlStash.store(part, safe=True) + return result + +class SmartyExtension(Extension): + def __init__(self, configs): + self.config = { + 'smart_quotes': [True, 'Educate quotes'], + 'smart_dashes': [True, 'Educate dashes'], + 'smart_ellipses': [True, 'Educate ellipses'] + } + for key, value in configs: + if value.lower() in ('true', 1): + value = True + elif value.lower() in ('false', 0): + value = False + else: + raise ValueError('Cannot parse bool value: %s' % value) + self.setConfig(key, value) + + def _addPatterns(self, md, patterns, serie): + for ind, pattern in enumerate(patterns): + pattern += (md,) + pattern = SubstituteTextPattern(*pattern) + after = ('>smarty-%s-%d' % (serie, ind - 1) if ind else '>entity') + name = 'smarty-%s-%d' % (serie, ind) + md.inlinePatterns.add(name, pattern, after) + + def educateDashes(self, md): + emDashesPattern = SubstituteTextPattern(r'(?entity') + md.inlinePatterns.add('smarty-en-dashes', enDashesPattern, + '>smarty-em-dashes') + + def educateEllipses(self, md): + ellipsesPattern = SubstituteTextPattern(r'(?entity') + + def educateQuotes(self, md): + patterns = ( + (singleQuoteStartRe, (rsquo,)), + (doubleQuoteStartRe, (rdquo,)), + (doubleQuoteSetsRe, (ldquo + lsquo,)), + (singleQuoteSetsRe, (lsquo + ldquo,)), + (openingSingleQuotesRegex, (2, lsquo)), + (closingSingleQuotesRegex, (rsquo,)), + (closingSingleQuotesRegex2, (rsquo, 2)), + (remainingSingleQuotesRegex, (lsquo,)), + (openingDoubleQuotesRegex, (2, ldquo)), + (closingDoubleQuotesRegex, (rdquo,)), + (closingDoubleQuotesRegex2, (rdquo,)), + (remainingDoubleQuotesRegex, (ldquo,)) + ) + self._addPatterns(md, patterns, 'quotes') + + def extendMarkdown(self, md, md_globals): + configs = self.getConfigs() + if configs['smart_quotes']: + self.educateQuotes(md) + if configs['smart_dashes']: + self.educateDashes(md) + if configs['smart_ellipses']: + self.educateEllipses(md) + +def makeExtension(configs=None): + return SmartyExtension(configs) diff --git a/tests/extensions/smarty.html b/tests/extensions/smarty.html new file mode 100644 index 0000000..2485ab7 --- /dev/null +++ b/tests/extensions/smarty.html @@ -0,0 +1,16 @@ +

1440–80’s
+1440–‘80s
+1440—‘80s
+1960s
+1960’s
+one two ‘60s
+‘60s

+

“Isn’t this fun”? — she said…
+“‘Quoted’ words in a larger quote.”
+“quoted” text and bold “quoted” text
+‘quoted’ text and bold ‘quoted’ text
+em-dashes (—) and ellipes (…)
+“Link” — she said.

+
+

Escaped -- ndash
+Escaped ellipsis...

\ No newline at end of file diff --git a/tests/extensions/smarty.txt b/tests/extensions/smarty.txt new file mode 100644 index 0000000..4015f71 --- /dev/null +++ b/tests/extensions/smarty.txt @@ -0,0 +1,19 @@ +1440--80's +1440--'80s +1440---'80s +1960s +1960's +one two '60s +'60s + +"Isn't this fun"? --- she said... +"'Quoted' words in a larger quote." +"quoted" text and **bold "quoted" text** +'quoted' text and **bold 'quoted' text** +em-dashes (---) and ellipes (...) +"[Link](http://example.com)" --- she said. + +--- -- --- + +Escaped \-- ndash +Escaped ellipsis\... \ No newline at end of file diff --git a/tests/extensions/test.cfg b/tests/extensions/test.cfg index 42145c1..1a13b1c 100644 --- a/tests/extensions/test.cfg +++ b/tests/extensions/test.cfg @@ -38,3 +38,6 @@ extensions=nl2br,attr_list [admonition] extensions=admonition + +[smarty] +extensions=smarty -- cgit v1.2.3 From a23ba4d248ab30bc790bfb6f1af53c77d8f6065f Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Fri, 26 Jul 2013 09:16:38 +0400 Subject: Add documentation for smarty extension --- docs/extensions/index.txt | 2 ++ docs/extensions/sane_lists.txt | 4 +-- docs/extensions/smarty.txt | 56 ++++++++++++++++++++++++++++++++++++++++++ docs/extensions/toc.txt | 4 +-- docs/siteindex.txt | 1 + 5 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 docs/extensions/smarty.txt diff --git a/docs/extensions/index.txt b/docs/extensions/index.txt index 2137a22..b71c2ec 100644 --- a/docs/extensions/index.txt +++ b/docs/extensions/index.txt @@ -53,6 +53,7 @@ Extension | "Name" [Meta-Data] | `meta` [New Line to Break] | `nl2br` [Sane Lists] | `sane_lists` +[SmartyPants] | `smarty` [Table of Contents] | `toc` [WikiLinks] | `wikilinks` @@ -70,6 +71,7 @@ Extension | "Name" [Meta-Data]: meta_data.html [New Line to Break]: nl2br.html [Sane Lists]: sane_lists.html +[SmartyPants]: smarty.html [Table of Contents]: toc.html [WikiLinks]: wikilinks.html diff --git a/docs/extensions/sane_lists.txt b/docs/extensions/sane_lists.txt index 4d24d17..7e67e1f 100644 --- a/docs/extensions/sane_lists.txt +++ b/docs/extensions/sane_lists.txt @@ -1,8 +1,8 @@ title: Sane Lists Extension prev_title: New Line to Break Extension prev_url: nl2br.html -next_title: Table of Contents Extension -next_url: toc.html +next_title: SmartyPants Extension +next_url: smarty.html Sane Lists ========== diff --git a/docs/extensions/smarty.txt b/docs/extensions/smarty.txt new file mode 100644 index 0000000..84c6494 --- /dev/null +++ b/docs/extensions/smarty.txt @@ -0,0 +1,56 @@ +title: SmartyPants Extension +prev_title: Sane Lists Extension +prev_url: sane_lists.html +next_title: Table of Contents Extension +next_url: toc.html + +SmartyPants +=========== + +Summary +------- + +The SmartyPants extension converts ASCII dashes, quotes and ellipses to +their HTML entity equivalents. + +ASCII symbol | Unicode replacements +------------ | -------------------- +' | ‘ ’ +" | “ ” +\... | … +\-- | – +-\-- | — + +Arguments +--------- + +All three arguments are set to `True` by default. + +Argument | Description +-------- | ----------- +`smart_dashes` | whether to convert dashes +`smart_quotes` | whether to convert quotes +`smart_ellipses` | whether to convert ellipses + +Usage +----- + +Default configuration: + + >>> html = markdown.markdown(text, + ... extensions=['smarty'] + ... ) + +Disable quotes convertation: + + >>> html = markdown.markdown(text, + ... extensions=['smarty(smart_quotes=False)'] + ... ) + +Further reading +--------------- + +SmartyPants extension is based on the original SmartyPants implementation +by John Gruber. Please read it's [documentation][1] for details. + +[1]: http://daringfireball.net/projects/smartypants/ diff --git a/docs/extensions/toc.txt b/docs/extensions/toc.txt index af282c6..2a91bb6 100644 --- a/docs/extensions/toc.txt +++ b/docs/extensions/toc.txt @@ -1,6 +1,6 @@ title: Table of Contents Extension -prev_title: Sane Lists Extension -prev_url: sane_lists.html +prev_title: SmartyPants Extension +prev_url: smarty.html next_title: Wikilinks Extension next_url: wikilinks.html diff --git a/docs/siteindex.txt b/docs/siteindex.txt index 45fdab2..6846015 100644 --- a/docs/siteindex.txt +++ b/docs/siteindex.txt @@ -44,6 +44,7 @@ Table of Contents * [Meta-Data](extensions/meta_data.html) * [New Line to Break](extensions/nl2br.html) * [Sane Lists](extensions/sane_lists.html) + * [SmartyPants](extensions/smarty.html) * [Table of Contents](extensions/toc.html) * [WikiLinks](extensions/wikilinks.html) * [Third Party Extensions](extensions/index.html#third-party-extensions) -- cgit v1.2.3 From c00f686313d656fb87842cf69541f09dce520dfa Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Fri, 26 Jul 2013 09:27:41 +0400 Subject: Use smarty extension when building docs --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a205c49..826de9d 100755 --- a/setup.py +++ b/setup.py @@ -144,7 +144,7 @@ class build_docs(Command): else: with codecs.open('docs/_template.html', encoding='utf-8') as f: template = f.read() - self.md = markdown.Markdown(extensions=['extra', 'toc', 'meta', 'admonition']) + self.md = markdown.Markdown(extensions=['extra', 'toc', 'meta', 'admonition', 'smarty']) for infile in self.docs: outfile, ext = os.path.splitext(infile) if ext == '.txt': -- cgit v1.2.3 From 85ad18071d61925168387934e4e6f9f82462c292 Mon Sep 17 00:00:00 2001 From: Dmitry Shachnev Date: Sat, 27 Jul 2013 16:21:00 +0400 Subject: Some fixes requested by @waylan, cleanup and tests improvements --- markdown/extensions/smarty.py | 20 +++++++++----------- tests/extensions/smarty.html | 6 +++++- tests/extensions/smarty.txt | 7 ++++++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 90d0c41..18f9217 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -106,9 +106,6 @@ doubleQuoteStartRe = r'^"(?=%s\\B)' % punctClass doubleQuoteSetsRe = r""""'(?=\w)""" singleQuoteSetsRe = r"""'"(?=\w)""" -# Special case for decade abbreviations (the '80s): -decadeAbbrRe = r"""\b'(?=\d{2}s)""" - # Get most opening double quotes: openingDoubleQuotesRegex = canonicalize(""" %s # symbols before the quote @@ -150,9 +147,7 @@ closingSingleQuotesRegex2 = canonicalize(r""" remainingSingleQuotesRegex = "'" remainingDoubleQuotesRegex = '"' -for name in ('mdash', 'ndash', 'hellip', 'lsquo', 'rsquo', 'ldquo', 'rdquo'): - locals()[name] = '&%s;' % name - +lsquo, rsquo, ldquo, rdquo = '‘', '’', '“', '”' class SubstituteTextPattern(HtmlPattern): def __init__(self, pattern, replace, markdown_instance): @@ -178,9 +173,11 @@ class SmartyExtension(Extension): 'smart_ellipses': [True, 'Educate ellipses'] } for key, value in configs: - if value.lower() in ('true', 1): + if not isinstance(value, str): + value = bool(value) + elif value.lower() in ('true', 't', 'yes', 'y', '1'): value = True - elif value.lower() in ('false', 0): + elif value.lower() in ('false', 'f', 'no', 'n', '0'): value = False else: raise ValueError('Cannot parse bool value: %s' % value) @@ -195,14 +192,14 @@ class SmartyExtension(Extension): md.inlinePatterns.add(name, pattern, after) def educateDashes(self, md): - emDashesPattern = SubstituteTextPattern(r'(?entity') md.inlinePatterns.add('smarty-en-dashes', enDashesPattern, '>smarty-em-dashes') def educateEllipses(self, md): - ellipsesPattern = SubstituteTextPattern(r'(?entity') def educateQuotes(self, md): @@ -230,6 +227,7 @@ class SmartyExtension(Extension): self.educateDashes(md) if configs['smart_ellipses']: self.educateEllipses(md) + md.ESCAPED_CHARS.extend(['"', "'"]) def makeExtension(configs=None): return SmartyExtension(configs) diff --git a/tests/extensions/smarty.html b/tests/extensions/smarty.html index 2485ab7..fbd15af 100644 --- a/tests/extensions/smarty.html +++ b/tests/extensions/smarty.html @@ -7,10 +7,14 @@ one two ‘60s
‘60s

“Isn’t this fun”? — she said…
“‘Quoted’ words in a larger quote.”
+‘Quoted “words” in a larger quote.’
“quoted” text and bold “quoted” text
‘quoted’ text and bold ‘quoted’ text
em-dashes (—) and ellipes (…)
Link” — she said.


Escaped -- ndash
-Escaped ellipsis...

\ No newline at end of file +'Escaped' "quotes"
+Escaped ellipsis...

+

‘Escaped "quotes" in real ones’
+'“Real” quotes in escaped ones'

\ No newline at end of file diff --git a/tests/extensions/smarty.txt b/tests/extensions/smarty.txt index 4015f71..5b5ece7 100644 --- a/tests/extensions/smarty.txt +++ b/tests/extensions/smarty.txt @@ -8,6 +8,7 @@ one two '60s "Isn't this fun"? --- she said... "'Quoted' words in a larger quote." +'Quoted "words" in a larger quote.' "quoted" text and **bold "quoted" text** 'quoted' text and **bold 'quoted' text** em-dashes (---) and ellipes (...) @@ -16,4 +17,8 @@ em-dashes (---) and ellipes (...) --- -- --- Escaped \-- ndash -Escaped ellipsis\... \ No newline at end of file +\'Escaped\' \"quotes\" +Escaped ellipsis\... + +'Escaped \"quotes\" in real ones' +\'"Real" quotes in escaped ones\' \ No newline at end of file -- cgit v1.2.3