diff options
author | Tiago Serafim <tserafim@gmail.com> | 2013-02-09 16:51:17 -0200 |
---|---|---|
committer | Tiago Serafim <tserafim@gmail.com> | 2013-02-09 17:39:12 -0200 |
commit | 14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1 (patch) | |
tree | edcb6d7462d31d66dcc792ed5e84373187b1de14 /markdown | |
parent | 6c15c64df40b9ded3f5dea2e694905222791c1b6 (diff) | |
parent | 41cc055580d63ffb7eb2bbb6c88e121727d91d06 (diff) | |
download | markdown-14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1.tar.gz markdown-14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1.tar.bz2 markdown-14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1.zip |
Merge branch 'master' into admonition
Conflicts:
docs/extensions/index.txt
tests/extensions/test.cfg
Diffstat (limited to 'markdown')
-rw-r--r-- | markdown/__init__.py | 27 | ||||
-rw-r--r-- | markdown/__version__.py | 28 | ||||
-rw-r--r-- | markdown/blockprocessors.py | 15 | ||||
-rw-r--r-- | markdown/extensions/attr_list.py | 23 | ||||
-rw-r--r-- | markdown/extensions/def_list.py | 14 | ||||
-rw-r--r-- | markdown/extensions/extra.py | 5 | ||||
-rw-r--r--[-rwxr-xr-x] | markdown/extensions/fenced_code.py | 2 | ||||
-rw-r--r-- | markdown/extensions/footnotes.py | 14 | ||||
-rw-r--r-- | markdown/extensions/headerid.py | 12 | ||||
-rw-r--r-- | markdown/extensions/smart_strong.py | 1 | ||||
-rw-r--r-- | markdown/inlinepatterns.py | 41 | ||||
-rw-r--r-- | markdown/odict.py | 2 | ||||
-rw-r--r-- | markdown/preprocessors.py | 15 | ||||
-rw-r--r-- | markdown/treeprocessors.py | 6 | ||||
-rw-r--r-- | markdown/util.py | 4 |
15 files changed, 141 insertions, 68 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py index 81404e0..aceaf60 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -30,14 +30,11 @@ Copyright 2004 Manfred Stienstra (the original version) License: BSD (see LICENSE for details). """ -version = "2.2.0" -version_info = (2,2,0, "final") - +from __version__ import version, version_info import re import codecs import sys import logging -import warnings import util from preprocessors import build_preprocessors from blockprocessors import build_block_parser @@ -135,9 +132,9 @@ class Markdown: self.references = {} self.htmlStash = util.HtmlStash() + self.set_output_format(kwargs.get('output_format', 'xhtml1')) self.registerExtensions(extensions=kwargs.get('extensions', []), configs=kwargs.get('extension_configs', {})) - self.set_output_format(kwargs.get('output_format', 'xhtml1')) self.reset() def build_parser(self): @@ -284,11 +281,6 @@ class Markdown: e.reason += '. -- Note: Markdown only accepts unicode input!' raise - source = source.replace(util.STX, "").replace(util.ETX, "") - source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" - source = re.sub(r'\n\s+\n', '\n\n', source) - source = source.expandtabs(self.tab_length) - # Split into lines and run the line preprocessors. self.lines = source.split("\n") for prep in self.preprocessors.values(): @@ -379,15 +371,14 @@ class Markdown: output_file.write(html) # Don't close here. User may want to write more. else: - if sys.stdout.encoding: - # If we are in Python 3 or if we are not piping output: + # Encode manually and write bytes to stdout. + html = html.encode(encoding, "xmlcharrefreplace") + try: + # Write bytes directly to buffer (Python 3). + sys.stdout.buffer.write(html) + except AttributeError: + # Probably Python 2, which works with bytes by default. sys.stdout.write(html) - else: - # In python 2.x if you pipe output on command line, - # sys.stdout.encoding is None. So lets set it: - writer = codecs.getwriter(encoding) - stdout = writer(sys.stdout, errors="xmlcharrefreplace") - stdout.write(html) return self diff --git a/markdown/__version__.py b/markdown/__version__.py new file mode 100644 index 0000000..bbe1b3f --- /dev/null +++ b/markdown/__version__.py @@ -0,0 +1,28 @@ +# +# markdown/__version__.py +# +# version_info should conform to PEP 386 +# (major, minor, micro, alpha/beta/rc/final, #) +# (1, 1, 2, 'alpha', 0) => "1.1.2.dev" +# (1, 2, 0, 'beta', 2) => "1.2b2" +version_info = (2, 3, 0, 'alpha', 0) + +def _get_version(): + " Returns a PEP 386-compliant version number from version_info. " + assert len(version_info) == 5 + assert version_info[3] in ('alpha', 'beta', 'rc', 'final') + + parts = 2 if version_info[2] == 0 else 3 + main = '.'.join(map(str, version_info[:parts])) + + sub = '' + if version_info[3] == 'alpha' and version_info[4] == 0: + # TODO: maybe append some sort of git info here?? + sub = '.dev' + elif version_info[3] != 'final': + mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'c'} + sub = mapping[version_info[3]] + str(version_info[4]) + + return str(main + sub) + +version = _get_version() diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 7b14a85..b41df6a 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -485,7 +485,7 @@ class HRProcessor(BlockProcessor): # Recursively parse lines before hr so they get parsed first. self.parser.parseBlocks(parent, [prelines]) # create hr - hr = util.etree.SubElement(parent, 'hr') + util.etree.SubElement(parent, 'hr') # check for lines in block after hr. postlines = block[self.match.end():].lstrip('\n') if postlines: @@ -499,7 +499,7 @@ class EmptyBlockProcessor(BlockProcessor): # Detect a block that only contains whitespace # or only whitespace on the first line. - RE = re.compile(r'^\s*\n') + RE = re.compile(r'^ *(\n|$)') def test(self, parent, block): return bool(self.RE.match(block)) @@ -508,13 +508,14 @@ class EmptyBlockProcessor(BlockProcessor): block = blocks.pop(0) m = self.RE.match(block) if m: - # Add remaining line to master blocks for later. - blocks.insert(0, block[m.end():]) + theRest = block[m.end():] + if theRest: + # Add remaining lines to master blocks for later. + blocks.insert(0, theRest) sibling = self.lastChild(parent) - if sibling and sibling.tag == 'pre' and sibling[0] and \ - sibling[0].tag == 'code': + if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag == 'code': # Last block is a codeblock. Append to preserve whitespace. - sibling[0].text = util.AtomicString('%s/n/n/n' % sibling[0].text ) + sibling[0].text = util.AtomicString('%s\n' % sibling[0].text ) class ParagraphProcessor(BlockProcessor): diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 0aa18e0..3a79d85 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -67,10 +67,13 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE) BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) INLINE_RE = re.compile(r'^%s' % BASE_RE) + NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d' + r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef' + r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\u10000-\ueffff' + r'\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') def run(self, doc): for elem in doc.getiterator(): - #import pdb; pdb.set_trace() if isBlockLevel(elem.tag): # Block level: check for attrs on last line of text RE = self.BLOCK_RE @@ -114,18 +117,20 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): else: elem.set('class', v) else: - # assing attr k with v - elem.set(k, v) + # assign attr k with v + elem.set(self.sanitize_name(k), v) + + def sanitize_name(self, name): + """ + Sanitize name as 'an XML Name, minus the ":"'. + See http://www.w3.org/TR/REC-xml-names/#NT-NCName + """ + return self.NAME_RE.sub('_', name) class AttrListExtension(markdown.extensions.Extension): def extendMarkdown(self, md, md_globals): - if 'headerid' in md.treeprocessors.keys(): - # insert after 'headerid' treeprocessor - md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>headerid') - else: - # insert after 'inline' treeprocessor - md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>inline') + md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify') def makeExtension(configs={}): diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py index da1726a..382445c 100644 --- a/markdown/extensions/def_list.py +++ b/markdown/extensions/def_list.py @@ -34,10 +34,11 @@ class DefListProcessor(markdown.blockprocessors.BlockProcessor): return bool(self.RE.search(block)) def run(self, parent, blocks): - block = blocks.pop(0) - m = self.RE.search(block) - terms = [l.strip() for l in block[:m.start()].split('\n') if l.strip()] - block = block[m.end():] + + raw_block = blocks.pop(0) + m = self.RE.search(raw_block) + terms = [l.strip() for l in raw_block[:m.start()].split('\n') if l.strip()] + block = raw_block[m.end():] no_indent = self.NO_INDENT_RE.match(block) if no_indent: d, theRest = (block, None) @@ -48,6 +49,11 @@ class DefListProcessor(markdown.blockprocessors.BlockProcessor): else: d = m.group(2) sibling = self.lastChild(parent) + if not terms and sibling is None: + # This is not a definition item. Most likely a paragraph that + # starts with a colon at the begining of a document or list. + blocks.insert(0, raw_block) + return False if not terms and sibling.tag == 'p': # The previous paragraph contains the terms state = 'looselist' diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py index 2c7915e..ba646f5 100644 --- a/markdown/extensions/extra.py +++ b/markdown/extensions/extra.py @@ -45,8 +45,9 @@ class ExtraExtension(markdown.Extension): def extendMarkdown(self, md, md_globals): """ Register extension instances. """ md.registerExtensions(extensions, self.config) - # Turn on processing of markdown text within raw html - md.preprocessors['html_block'].markdown_in_raw = True + if not md.safeMode: + # Turn on processing of markdown text within raw html + md.preprocessors['html_block'].markdown_in_raw = True def makeExtension(configs={}): return ExtraExtension(configs=dict(configs)) diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 9a1284f..76d644f 100755..100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -95,7 +95,7 @@ class FencedCodeExtension(markdown.Extension): md.preprocessors.add('fenced_code_block', FencedBlockPreprocessor(md), - "_begin") + ">normalize_whitespace") class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor): diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index cfe41ed..0a0ddea 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -62,6 +62,9 @@ class FootnoteExtension(markdown.Extension): md.registerExtension(self) self.parser = md.parser self.md = md + self.sep = ':' + if self.md.output_format in ['html5', 'xhtml5']: + self.sep = '-' # Insert a preprocessor before ReferencePreprocessor md.preprocessors.add("footnote", FootnotePreprocessor(self), "<reference") @@ -106,16 +109,16 @@ class FootnoteExtension(markdown.Extension): def makeFootnoteId(self, id): """ Return footnote link id. """ if self.getConfig("UNIQUE_IDS"): - return 'fn:%d-%s' % (self.unique_prefix, id) + return 'fn%s%d-%s' % (self.sep, self.unique_prefix, id) else: - return 'fn:%s' % id + return 'fn%s%s' % (self.sep, id) def makeFootnoteRefId(self, id): """ Return footnote back-link id. """ if self.getConfig("UNIQUE_IDS"): - return 'fnref:%d-%s' % (self.unique_prefix, id) + return 'fnref%s%d-%s' % (self.sep, self.unique_prefix, id) else: - return 'fnref:%s' % id + return 'fnref%s%s' % (self.sep, id) def makeFootnotesDiv(self, root): """ Return div of footnotes as et Element. """ @@ -125,7 +128,7 @@ class FootnoteExtension(markdown.Extension): div = etree.Element("div") div.set('class', 'footnote') - hr = etree.SubElement(div, "hr") + etree.SubElement(div, "hr") ol = etree.SubElement(div, "ol") for id in self.footnotes.keys(): @@ -171,7 +174,6 @@ class FootnotePreprocessor(markdown.preprocessors.Preprocessor): """ newlines = [] i = 0 - #import pdb; pdb.set_trace() #for i, line in enumerate(lines): while True: m = DEF_RE.match(lines[i]) if m: diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index e86ab15..b6a12e8 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -77,9 +77,7 @@ Dependencies: """ import markdown -from markdown.util import etree import re -from string import ascii_lowercase, digits, punctuation import logging import unicodedata @@ -135,7 +133,7 @@ class HeaderIdTreeprocessor(markdown.treeprocessors.Treeprocessor): if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if force_id: if "id" in elem.attrib: - id = elem.id + id = elem.get('id') else: id = slugify(u''.join(itertext(elem)), sep) elem.set('id', unique(id, self.IDs)) @@ -185,8 +183,12 @@ class HeaderIdExtension (markdown.Extension): self.processor = HeaderIdTreeprocessor() self.processor.md = md self.processor.config = self.getConfigs() - # Replace existing hasheader in place. - md.treeprocessors.add('headerid', self.processor, '>inline') + if 'attr_list' in md.treeprocessors.keys(): + # insert after attr_list treeprocessor + md.treeprocessors.add('headerid', self.processor, '>attr_list') + else: + # insert after 'prettify' treeprocessor. + md.treeprocessors.add('headerid', self.processor, '>prettify') def reset(self): self.processor.IDs = [] diff --git a/markdown/extensions/smart_strong.py b/markdown/extensions/smart_strong.py index 3ed3560..7166989 100644 --- a/markdown/extensions/smart_strong.py +++ b/markdown/extensions/smart_strong.py @@ -22,7 +22,6 @@ Copyright 2011 ''' -import re import markdown from markdown.inlinepatterns import SimpleTagPattern diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index d3ef4e0..f64aa58 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -45,7 +45,6 @@ import util import odict import re from urlparse import urlparse, urlunparse -import sys # If you see an ImportError for htmlentitydefs after using 2to3 to convert for # use by Python3, then you are probably using the buggy version from Python 3.0. # We recomend using the tool from Python 3.1 even if you will be running the @@ -69,7 +68,6 @@ def build_inlinepatterns(md_instance, **kwargs): ReferencePattern(SHORT_REF_RE, md_instance) inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) - inlinePatterns["linebreak2"] = SubstituteTagPattern(LINE_BREAK_2_RE, 'br') inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') if md_instance.safeMode != 'escape': inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) @@ -119,7 +117,6 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com> HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...> ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & LINE_BREAK_RE = r' \n' # two spaces at end of line -LINE_BREAK_2_RE = r' $' # two spaces at end of text def dequote(string): @@ -191,10 +188,27 @@ class Pattern: stash = self.markdown.treeprocessors['inline'].stashed_nodes except KeyError: return text + def itertext(el): + ' Reimplement Element.itertext for older python versions ' + tag = el.tag + if not isinstance(tag, basestring) and tag is not None: + return + if el.text: + yield el.text + for e in el: + for s in itertext(e): + yield s + if e.tail: + yield e.tail def get_stash(m): id = m.group(1) if id in stash: - return stash.get(id) + value = stash.get(id) + if isinstance(value, basestring): + return value + else: + # An etree Element - return text content only + return ''.join(itertext(value)) return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) @@ -328,6 +342,7 @@ class LinkPattern(Pattern): `username:password@host:port`. """ + url = url.replace(' ', '%20') if not self.markdown.safeMode: # Return immediately bipassing parsing. return url @@ -339,14 +354,18 @@ class LinkPattern(Pattern): return '' locless_schemes = ['', 'mailto', 'news'] + allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps'] + if scheme not in allowed_schemes: + # Not a known (allowed) scheme. Not safe. + return '' + if netloc == '' and scheme not in locless_schemes: - # This fails regardless of anything else. - # Return immediately to save additional proccessing + # This should not happen. Treat as suspect. return '' for part in url[2:]: if ":" in part: - # Not a safe url + # A colon in "path", "parameters", "query" or "fragment" is suspect. return '' # Url passes all tests. Return url as-is. @@ -372,7 +391,7 @@ class ImagePattern(LinkPattern): else: truealt = m.group(2) - el.set('alt', truealt) + el.set('alt', self.unescape(truealt)) return el class ReferencePattern(LinkPattern): @@ -417,7 +436,11 @@ class ImageReferencePattern(ReferencePattern): el.set("src", self.sanitize_url(href)) if title: el.set("title", title) - el.set("alt", text) + + if self.markdown.enable_attributes: + text = handleAttributes(text, el) + + el.set("alt", self.unescape(text)) return el diff --git a/markdown/odict.py b/markdown/odict.py index d77d701..02864bf 100644 --- a/markdown/odict.py +++ b/markdown/odict.py @@ -119,7 +119,7 @@ class OrderedDict(dict): """ Return the index of a given key. """ try: return self.keyOrder.index(key) - except ValueError, e: + except ValueError: raise ValueError("Element '%s' was not found in OrderedDict" % key) def index_for_location(self, location): diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index e7743fb..3751264 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -14,6 +14,7 @@ import odict def build_preprocessors(md_instance, **kwargs): """ Build the default set of preprocessors used by Markdown. """ preprocessors = odict.OrderedDict() + preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance) if md_instance.safeMode != 'escape': preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance) preprocessors["reference"] = ReferencePreprocessor(md_instance) @@ -41,6 +42,18 @@ class Preprocessor(util.Processor): pass +class NormalizeWhitespace(Preprocessor): + """ Normalize whitespace for consistant parsing. """ + + def run(self, lines): + source = '\n'.join(lines) + source = source.replace(util.STX, "").replace(util.ETX, "") + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + source = source.expandtabs(self.markdown.tab_length) + source = re.sub(r'\n +\n', '\n\n', source) + return source.split('\n') + + class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" @@ -127,7 +140,7 @@ class HtmlBlockPreprocessor(Preprocessor): def run(self, lines): text = "\n".join(lines) new_blocks = [] - text = text.split("\n\n") + text = text.rsplit("\n\n") items = [] left_tag = '' right_tag = '' diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 841fe0a..b5eedbd 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -1,4 +1,3 @@ -import re import inlinepatterns import util import odict @@ -358,3 +357,8 @@ class PrettifyTreeprocessor(Treeprocessor): br.tail = '\n' else: br.tail = '\n%s' % br.tail + # Clean up extra empty lines at end of code blocks. + pres = root.getiterator('pre') + for pre in pres: + if len(pre) and pre[0].tag == 'code': + pre[0].text = pre[0].text.rstrip() + '\n' diff --git a/markdown/util.py b/markdown/util.py index 13cbff2..12dcbd5 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- import re -from logging import CRITICAL - import etree_loader @@ -20,7 +18,7 @@ BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" "|hr|hr/|style|li|dt|dd|thead|tbody" "|tr|th|td|section|footer|header|group|figure" "|figcaption|aside|article|canvas|output" - "|progress|video)$") + "|progress|video)$", re.IGNORECASE) # Placeholders STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder |