aboutsummaryrefslogtreecommitdiffstats
path: root/markdown
diff options
context:
space:
mode:
authorTiago Serafim <tserafim@gmail.com>2013-02-09 16:51:17 -0200
committerTiago Serafim <tserafim@gmail.com>2013-02-09 17:39:12 -0200
commit14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1 (patch)
treeedcb6d7462d31d66dcc792ed5e84373187b1de14 /markdown
parent6c15c64df40b9ded3f5dea2e694905222791c1b6 (diff)
parent41cc055580d63ffb7eb2bbb6c88e121727d91d06 (diff)
downloadmarkdown-14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1.tar.gz
markdown-14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1.tar.bz2
markdown-14f43d5a4c91ac13528f4c9eb8c6247fdafb0ea1.zip
Merge branch 'master' into admonition
Conflicts: docs/extensions/index.txt tests/extensions/test.cfg
Diffstat (limited to 'markdown')
-rw-r--r--markdown/__init__.py27
-rw-r--r--markdown/__version__.py28
-rw-r--r--markdown/blockprocessors.py15
-rw-r--r--markdown/extensions/attr_list.py23
-rw-r--r--markdown/extensions/def_list.py14
-rw-r--r--markdown/extensions/extra.py5
-rw-r--r--[-rwxr-xr-x]markdown/extensions/fenced_code.py2
-rw-r--r--markdown/extensions/footnotes.py14
-rw-r--r--markdown/extensions/headerid.py12
-rw-r--r--markdown/extensions/smart_strong.py1
-rw-r--r--markdown/inlinepatterns.py41
-rw-r--r--markdown/odict.py2
-rw-r--r--markdown/preprocessors.py15
-rw-r--r--markdown/treeprocessors.py6
-rw-r--r--markdown/util.py4
15 files changed, 141 insertions, 68 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 81404e0..aceaf60 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -30,14 +30,11 @@ Copyright 2004 Manfred Stienstra (the original version)
License: BSD (see LICENSE for details).
"""
-version = "2.2.0"
-version_info = (2,2,0, "final")
-
+from __version__ import version, version_info
import re
import codecs
import sys
import logging
-import warnings
import util
from preprocessors import build_preprocessors
from blockprocessors import build_block_parser
@@ -135,9 +132,9 @@ class Markdown:
self.references = {}
self.htmlStash = util.HtmlStash()
+ self.set_output_format(kwargs.get('output_format', 'xhtml1'))
self.registerExtensions(extensions=kwargs.get('extensions', []),
configs=kwargs.get('extension_configs', {}))
- self.set_output_format(kwargs.get('output_format', 'xhtml1'))
self.reset()
def build_parser(self):
@@ -284,11 +281,6 @@ class Markdown:
e.reason += '. -- Note: Markdown only accepts unicode input!'
raise
- source = source.replace(util.STX, "").replace(util.ETX, "")
- source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
- source = re.sub(r'\n\s+\n', '\n\n', source)
- source = source.expandtabs(self.tab_length)
-
# Split into lines and run the line preprocessors.
self.lines = source.split("\n")
for prep in self.preprocessors.values():
@@ -379,15 +371,14 @@ class Markdown:
output_file.write(html)
# Don't close here. User may want to write more.
else:
- if sys.stdout.encoding:
- # If we are in Python 3 or if we are not piping output:
+ # Encode manually and write bytes to stdout.
+ html = html.encode(encoding, "xmlcharrefreplace")
+ try:
+ # Write bytes directly to buffer (Python 3).
+ sys.stdout.buffer.write(html)
+ except AttributeError:
+ # Probably Python 2, which works with bytes by default.
sys.stdout.write(html)
- else:
- # In python 2.x if you pipe output on command line,
- # sys.stdout.encoding is None. So lets set it:
- writer = codecs.getwriter(encoding)
- stdout = writer(sys.stdout, errors="xmlcharrefreplace")
- stdout.write(html)
return self
diff --git a/markdown/__version__.py b/markdown/__version__.py
new file mode 100644
index 0000000..bbe1b3f
--- /dev/null
+++ b/markdown/__version__.py
@@ -0,0 +1,28 @@
+#
+# markdown/__version__.py
+#
+# version_info should conform to PEP 386
+# (major, minor, micro, alpha/beta/rc/final, #)
+# (1, 1, 2, 'alpha', 0) => "1.1.2.dev"
+# (1, 2, 0, 'beta', 2) => "1.2b2"
+version_info = (2, 3, 0, 'alpha', 0)
+
+def _get_version():
+ " Returns a PEP 386-compliant version number from version_info. "
+ assert len(version_info) == 5
+ assert version_info[3] in ('alpha', 'beta', 'rc', 'final')
+
+ parts = 2 if version_info[2] == 0 else 3
+ main = '.'.join(map(str, version_info[:parts]))
+
+ sub = ''
+ if version_info[3] == 'alpha' and version_info[4] == 0:
+ # TODO: maybe append some sort of git info here??
+ sub = '.dev'
+ elif version_info[3] != 'final':
+ mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'c'}
+ sub = mapping[version_info[3]] + str(version_info[4])
+
+ return str(main + sub)
+
+version = _get_version()
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 7b14a85..b41df6a 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -485,7 +485,7 @@ class HRProcessor(BlockProcessor):
# Recursively parse lines before hr so they get parsed first.
self.parser.parseBlocks(parent, [prelines])
# create hr
- hr = util.etree.SubElement(parent, 'hr')
+ util.etree.SubElement(parent, 'hr')
# check for lines in block after hr.
postlines = block[self.match.end():].lstrip('\n')
if postlines:
@@ -499,7 +499,7 @@ class EmptyBlockProcessor(BlockProcessor):
# Detect a block that only contains whitespace
# or only whitespace on the first line.
- RE = re.compile(r'^\s*\n')
+ RE = re.compile(r'^ *(\n|$)')
def test(self, parent, block):
return bool(self.RE.match(block))
@@ -508,13 +508,14 @@ class EmptyBlockProcessor(BlockProcessor):
block = blocks.pop(0)
m = self.RE.match(block)
if m:
- # Add remaining line to master blocks for later.
- blocks.insert(0, block[m.end():])
+ theRest = block[m.end():]
+ if theRest:
+ # Add remaining lines to master blocks for later.
+ blocks.insert(0, theRest)
sibling = self.lastChild(parent)
- if sibling and sibling.tag == 'pre' and sibling[0] and \
- sibling[0].tag == 'code':
+ if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag == 'code':
# Last block is a codeblock. Append to preserve whitespace.
- sibling[0].text = util.AtomicString('%s/n/n/n' % sibling[0].text )
+ sibling[0].text = util.AtomicString('%s\n' % sibling[0].text )
class ParagraphProcessor(BlockProcessor):
diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
index 0aa18e0..3a79d85 100644
--- a/markdown/extensions/attr_list.py
+++ b/markdown/extensions/attr_list.py
@@ -67,10 +67,13 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor):
HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE)
BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
INLINE_RE = re.compile(r'^%s' % BASE_RE)
+ NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d'
+ r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef'
+ r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd\u10000-\ueffff'
+ r'\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
def run(self, doc):
for elem in doc.getiterator():
- #import pdb; pdb.set_trace()
if isBlockLevel(elem.tag):
# Block level: check for attrs on last line of text
RE = self.BLOCK_RE
@@ -114,18 +117,20 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor):
else:
elem.set('class', v)
else:
- # assing attr k with v
- elem.set(k, v)
+ # assign attr k with v
+ elem.set(self.sanitize_name(k), v)
+
+ def sanitize_name(self, name):
+ """
+ Sanitize name as 'an XML Name, minus the ":"'.
+ See http://www.w3.org/TR/REC-xml-names/#NT-NCName
+ """
+ return self.NAME_RE.sub('_', name)
class AttrListExtension(markdown.extensions.Extension):
def extendMarkdown(self, md, md_globals):
- if 'headerid' in md.treeprocessors.keys():
- # insert after 'headerid' treeprocessor
- md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>headerid')
- else:
- # insert after 'inline' treeprocessor
- md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>inline')
+ md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify')
def makeExtension(configs={}):
diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py
index da1726a..382445c 100644
--- a/markdown/extensions/def_list.py
+++ b/markdown/extensions/def_list.py
@@ -34,10 +34,11 @@ class DefListProcessor(markdown.blockprocessors.BlockProcessor):
return bool(self.RE.search(block))
def run(self, parent, blocks):
- block = blocks.pop(0)
- m = self.RE.search(block)
- terms = [l.strip() for l in block[:m.start()].split('\n') if l.strip()]
- block = block[m.end():]
+
+ raw_block = blocks.pop(0)
+ m = self.RE.search(raw_block)
+ terms = [l.strip() for l in raw_block[:m.start()].split('\n') if l.strip()]
+ block = raw_block[m.end():]
no_indent = self.NO_INDENT_RE.match(block)
if no_indent:
d, theRest = (block, None)
@@ -48,6 +49,11 @@ class DefListProcessor(markdown.blockprocessors.BlockProcessor):
else:
d = m.group(2)
sibling = self.lastChild(parent)
+ if not terms and sibling is None:
+ # This is not a definition item. Most likely a paragraph that
+ # starts with a colon at the begining of a document or list.
+ blocks.insert(0, raw_block)
+ return False
if not terms and sibling.tag == 'p':
# The previous paragraph contains the terms
state = 'looselist'
diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py
index 2c7915e..ba646f5 100644
--- a/markdown/extensions/extra.py
+++ b/markdown/extensions/extra.py
@@ -45,8 +45,9 @@ class ExtraExtension(markdown.Extension):
def extendMarkdown(self, md, md_globals):
""" Register extension instances. """
md.registerExtensions(extensions, self.config)
- # Turn on processing of markdown text within raw html
- md.preprocessors['html_block'].markdown_in_raw = True
+ if not md.safeMode:
+ # Turn on processing of markdown text within raw html
+ md.preprocessors['html_block'].markdown_in_raw = True
def makeExtension(configs={}):
return ExtraExtension(configs=dict(configs))
diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
index 9a1284f..76d644f 100755..100644
--- a/markdown/extensions/fenced_code.py
+++ b/markdown/extensions/fenced_code.py
@@ -95,7 +95,7 @@ class FencedCodeExtension(markdown.Extension):
md.preprocessors.add('fenced_code_block',
FencedBlockPreprocessor(md),
- "_begin")
+ ">normalize_whitespace")
class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
index cfe41ed..0a0ddea 100644
--- a/markdown/extensions/footnotes.py
+++ b/markdown/extensions/footnotes.py
@@ -62,6 +62,9 @@ class FootnoteExtension(markdown.Extension):
md.registerExtension(self)
self.parser = md.parser
self.md = md
+ self.sep = ':'
+ if self.md.output_format in ['html5', 'xhtml5']:
+ self.sep = '-'
# Insert a preprocessor before ReferencePreprocessor
md.preprocessors.add("footnote", FootnotePreprocessor(self),
"<reference")
@@ -106,16 +109,16 @@ class FootnoteExtension(markdown.Extension):
def makeFootnoteId(self, id):
""" Return footnote link id. """
if self.getConfig("UNIQUE_IDS"):
- return 'fn:%d-%s' % (self.unique_prefix, id)
+ return 'fn%s%d-%s' % (self.sep, self.unique_prefix, id)
else:
- return 'fn:%s' % id
+ return 'fn%s%s' % (self.sep, id)
def makeFootnoteRefId(self, id):
""" Return footnote back-link id. """
if self.getConfig("UNIQUE_IDS"):
- return 'fnref:%d-%s' % (self.unique_prefix, id)
+ return 'fnref%s%d-%s' % (self.sep, self.unique_prefix, id)
else:
- return 'fnref:%s' % id
+ return 'fnref%s%s' % (self.sep, id)
def makeFootnotesDiv(self, root):
""" Return div of footnotes as et Element. """
@@ -125,7 +128,7 @@ class FootnoteExtension(markdown.Extension):
div = etree.Element("div")
div.set('class', 'footnote')
- hr = etree.SubElement(div, "hr")
+ etree.SubElement(div, "hr")
ol = etree.SubElement(div, "ol")
for id in self.footnotes.keys():
@@ -171,7 +174,6 @@ class FootnotePreprocessor(markdown.preprocessors.Preprocessor):
"""
newlines = []
i = 0
- #import pdb; pdb.set_trace() #for i, line in enumerate(lines):
while True:
m = DEF_RE.match(lines[i])
if m:
diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py
index e86ab15..b6a12e8 100644
--- a/markdown/extensions/headerid.py
+++ b/markdown/extensions/headerid.py
@@ -77,9 +77,7 @@ Dependencies:
"""
import markdown
-from markdown.util import etree
import re
-from string import ascii_lowercase, digits, punctuation
import logging
import unicodedata
@@ -135,7 +133,7 @@ class HeaderIdTreeprocessor(markdown.treeprocessors.Treeprocessor):
if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
if force_id:
if "id" in elem.attrib:
- id = elem.id
+ id = elem.get('id')
else:
id = slugify(u''.join(itertext(elem)), sep)
elem.set('id', unique(id, self.IDs))
@@ -185,8 +183,12 @@ class HeaderIdExtension (markdown.Extension):
self.processor = HeaderIdTreeprocessor()
self.processor.md = md
self.processor.config = self.getConfigs()
- # Replace existing hasheader in place.
- md.treeprocessors.add('headerid', self.processor, '>inline')
+ if 'attr_list' in md.treeprocessors.keys():
+ # insert after attr_list treeprocessor
+ md.treeprocessors.add('headerid', self.processor, '>attr_list')
+ else:
+ # insert after 'prettify' treeprocessor.
+ md.treeprocessors.add('headerid', self.processor, '>prettify')
def reset(self):
self.processor.IDs = []
diff --git a/markdown/extensions/smart_strong.py b/markdown/extensions/smart_strong.py
index 3ed3560..7166989 100644
--- a/markdown/extensions/smart_strong.py
+++ b/markdown/extensions/smart_strong.py
@@ -22,7 +22,6 @@ Copyright 2011
'''
-import re
import markdown
from markdown.inlinepatterns import SimpleTagPattern
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index d3ef4e0..f64aa58 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -45,7 +45,6 @@ import util
import odict
import re
from urlparse import urlparse, urlunparse
-import sys
# If you see an ImportError for htmlentitydefs after using 2to3 to convert for
# use by Python3, then you are probably using the buggy version from Python 3.0.
# We recomend using the tool from Python 3.1 even if you will be running the
@@ -69,7 +68,6 @@ def build_inlinepatterns(md_instance, **kwargs):
ReferencePattern(SHORT_REF_RE, md_instance)
inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
- inlinePatterns["linebreak2"] = SubstituteTagPattern(LINE_BREAK_2_RE, 'br')
inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
if md_instance.safeMode != 'escape':
inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
@@ -119,7 +117,6 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com>
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...>
ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &amp;
LINE_BREAK_RE = r' \n' # two spaces at end of line
-LINE_BREAK_2_RE = r' $' # two spaces at end of text
def dequote(string):
@@ -191,10 +188,27 @@ class Pattern:
stash = self.markdown.treeprocessors['inline'].stashed_nodes
except KeyError:
return text
+ def itertext(el):
+ ' Reimplement Element.itertext for older python versions '
+ tag = el.tag
+ if not isinstance(tag, basestring) and tag is not None:
+ return
+ if el.text:
+ yield el.text
+ for e in el:
+ for s in itertext(e):
+ yield s
+ if e.tail:
+ yield e.tail
def get_stash(m):
id = m.group(1)
if id in stash:
- return stash.get(id)
+ value = stash.get(id)
+ if isinstance(value, basestring):
+ return value
+ else:
+ # An etree Element - return text content only
+ return ''.join(itertext(value))
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
@@ -328,6 +342,7 @@ class LinkPattern(Pattern):
`username:password@host:port`.
"""
+ url = url.replace(' ', '%20')
if not self.markdown.safeMode:
# Return immediately bipassing parsing.
return url
@@ -339,14 +354,18 @@ class LinkPattern(Pattern):
return ''
locless_schemes = ['', 'mailto', 'news']
+ allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps']
+ if scheme not in allowed_schemes:
+ # Not a known (allowed) scheme. Not safe.
+ return ''
+
if netloc == '' and scheme not in locless_schemes:
- # This fails regardless of anything else.
- # Return immediately to save additional proccessing
+ # This should not happen. Treat as suspect.
return ''
for part in url[2:]:
if ":" in part:
- # Not a safe url
+ # A colon in "path", "parameters", "query" or "fragment" is suspect.
return ''
# Url passes all tests. Return url as-is.
@@ -372,7 +391,7 @@ class ImagePattern(LinkPattern):
else:
truealt = m.group(2)
- el.set('alt', truealt)
+ el.set('alt', self.unescape(truealt))
return el
class ReferencePattern(LinkPattern):
@@ -417,7 +436,11 @@ class ImageReferencePattern(ReferencePattern):
el.set("src", self.sanitize_url(href))
if title:
el.set("title", title)
- el.set("alt", text)
+
+ if self.markdown.enable_attributes:
+ text = handleAttributes(text, el)
+
+ el.set("alt", self.unescape(text))
return el
diff --git a/markdown/odict.py b/markdown/odict.py
index d77d701..02864bf 100644
--- a/markdown/odict.py
+++ b/markdown/odict.py
@@ -119,7 +119,7 @@ class OrderedDict(dict):
""" Return the index of a given key. """
try:
return self.keyOrder.index(key)
- except ValueError, e:
+ except ValueError:
raise ValueError("Element '%s' was not found in OrderedDict" % key)
def index_for_location(self, location):
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index e7743fb..3751264 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -14,6 +14,7 @@ import odict
def build_preprocessors(md_instance, **kwargs):
""" Build the default set of preprocessors used by Markdown. """
preprocessors = odict.OrderedDict()
+ preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
if md_instance.safeMode != 'escape':
preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
preprocessors["reference"] = ReferencePreprocessor(md_instance)
@@ -41,6 +42,18 @@ class Preprocessor(util.Processor):
pass
+class NormalizeWhitespace(Preprocessor):
+ """ Normalize whitespace for consistant parsing. """
+
+ def run(self, lines):
+ source = '\n'.join(lines)
+ source = source.replace(util.STX, "").replace(util.ETX, "")
+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+ source = source.expandtabs(self.markdown.tab_length)
+ source = re.sub(r'\n +\n', '\n\n', source)
+ return source.split('\n')
+
+
class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""
@@ -127,7 +140,7 @@ class HtmlBlockPreprocessor(Preprocessor):
def run(self, lines):
text = "\n".join(lines)
new_blocks = []
- text = text.split("\n\n")
+ text = text.rsplit("\n\n")
items = []
left_tag = ''
right_tag = ''
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index 841fe0a..b5eedbd 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -1,4 +1,3 @@
-import re
import inlinepatterns
import util
import odict
@@ -358,3 +357,8 @@ class PrettifyTreeprocessor(Treeprocessor):
br.tail = '\n'
else:
br.tail = '\n%s' % br.tail
+ # Clean up extra empty lines at end of code blocks.
+ pres = root.getiterator('pre')
+ for pre in pres:
+ if len(pre) and pre[0].tag == 'code':
+ pre[0].text = pre[0].text.rstrip() + '\n'
diff --git a/markdown/util.py b/markdown/util.py
index 13cbff2..12dcbd5 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
import re
-from logging import CRITICAL
-
import etree_loader
@@ -20,7 +18,7 @@ BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
"|hr|hr/|style|li|dt|dd|thead|tbody"
"|tr|th|td|section|footer|header|group|figure"
"|figcaption|aside|article|canvas|output"
- "|progress|video)$")
+ "|progress|video)$", re.IGNORECASE)
# Placeholders
STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder
ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder