aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--markdown/__init__.py35
-rw-r--r--markdown/blockparser.py5
-rw-r--r--markdown/blockprocessors.py26
-rw-r--r--markdown/inlinepatterns.py10
-rw-r--r--markdown/postprocessors.py2
-rw-r--r--markdown/treeprocessors.py2
-rw-r--r--markdown/util.py8
7 files changed, 48 insertions, 40 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index c7988b8..6d16b0e 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -90,11 +90,18 @@ import html4
class Markdown:
"""Convert Markdown to HTML."""
+ TAB_LENGTH = 4 # expand tabs to this many spaces
+ ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
+ SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
+ DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output
+ HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
+ DOC_TAG = "div" # Element used to wrap document - later removed
+
def __init__(self,
extensions=[],
extension_configs={},
safe_mode = False,
- output_format=util.DEFAULT_OUTPUT_FORMAT):
+ output_format=None):
"""
Creates a new Markdown instance.
@@ -131,7 +138,7 @@ class Markdown:
# footnote preprocessor will be inserted with "<reference"
# Block processors - ran by the parser
- self.parser = blockparser.BlockParser()
+ self.parser = blockparser.BlockParser(self)
self.parser.blockprocessors['empty'] = \
blockprocessors.EmptyBlockProcessor(self.parser)
self.parser.blockprocessors['indent'] = \
@@ -190,8 +197,14 @@ class Markdown:
inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
self.inlinePatterns["emphasis"] = \
inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
- self.inlinePatterns["emphasis2"] = \
- inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
+ if self.SMART_EMPHASIS:
+ self.inlinePatterns["emphasis2"] = \
+ inlinepatterns.SimpleTagPattern( \
+ inlinepatterns.SMART_EMPHASIS_RE, 'em')
+ else:
+ self.inlinePatterns["emphasis2"] = \
+ inlinepatterns.SimpleTagPattern( \
+ inlinepatterns.EMPHASIS_2_RE, 'em')
# The order of the handlers matters!!!
@@ -265,6 +278,8 @@ class Markdown:
def set_output_format(self, format):
""" Set the output format for the class instance. """
+ if format is None:
+ format = self.DEFAULT_OUTPUT_FORMAT
try:
self.serializer = self.output_formats[format.lower()]
except KeyError:
@@ -295,7 +310,7 @@ class Markdown:
source = source.replace(util.STX, "").replace(util.ETX, "")
source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
source = re.sub(r'\n\s+\n', '\n\n', source)
- source = source.expandtabs(util.TAB_LENGTH)
+ source = source.expandtabs(self.TAB_LENGTH)
# Split into lines and run the line preprocessors.
self.lines = source.split("\n")
@@ -315,11 +330,11 @@ class Markdown:
output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8"))
if self.stripTopLevelTags:
try:
- start = output.index('<%s>'%util.DOC_TAG)+len(util.DOC_TAG)+2
- end = output.rindex('</%s>'%util.DOC_TAG)
+ start = output.index('<%s>'%self.DOC_TAG)+len(self.DOC_TAG)+2
+ end = output.rindex('</%s>'%self.DOC_TAG)
output = output[start:end].strip()
except ValueError:
- if output.strip().endswith('<%s />'%util.DOC_TAG):
+ if output.strip().endswith('<%s />'%self.DOC_TAG):
# We have an empty document
output = ''
else:
@@ -386,7 +401,7 @@ markdownFromFile().
def markdown(text,
extensions = [],
safe_mode = False,
- output_format = util.DEFAULT_OUTPUT_FORMAT):
+ output_format = None):
"""Convert a markdown string to HTML and return HTML as a unicode string.
This is a shortcut function for `Markdown` class to cover the most
@@ -421,7 +436,7 @@ def markdownFromFile(input = None,
extensions = [],
encoding = None,
safe_mode = False,
- output_format = util.DEFAULT_OUTPUT_FORMAT):
+ output_format = None):
"""Read markdown code from a file and write it to a file or a stream."""
md = Markdown(extensions=load_extensions(extensions),
safe_mode=safe_mode,
diff --git a/markdown/blockparser.py b/markdown/blockparser.py
index afbdabd..533b695 100644
--- a/markdown/blockparser.py
+++ b/markdown/blockparser.py
@@ -42,9 +42,10 @@ class BlockParser:
looping through them and creating an ElementTree object.
"""
- def __init__(self):
+ def __init__(self, markdown):
self.blockprocessors = odict.OrderedDict()
self.state = State()
+ self.markdown = markdown
def parseDocument(self, lines):
""" Parse a markdown document into an ElementTree.
@@ -57,7 +58,7 @@ class BlockParser:
"""
# Create a ElementTree from the lines
- self.root = util.etree.Element(util.DOC_TAG)
+ self.root = util.etree.Element(self.markdown.DOC_TAG)
self.parseChunk(self.root, '\n'.join(lines))
return util.etree.ElementTree(self.root)
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index a64be2a..4e39200 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -28,8 +28,9 @@ class BlockProcessor:
"""
- def __init__(self, parser=None):
+ def __init__(self, parser):
self.parser = parser
+ self.TAB_LENGTH = parser.markdown.TAB_LENGTH
def lastChild(self, parent):
""" Return the last child of an etree element. """
@@ -43,8 +44,8 @@ class BlockProcessor:
newtext = []
lines = text.split('\n')
for line in lines:
- if line.startswith(' '*util.TAB_LENGTH):
- newtext.append(line[util.TAB_LENGTH:])
+ if line.startswith(' '*self.TAB_LENGTH):
+ newtext.append(line[self.TAB_LENGTH:])
elif not line.strip():
newtext.append('')
else:
@@ -55,8 +56,8 @@ class BlockProcessor:
""" Remove a tab from front of lines but allowing dedented lines. """
lines = text.split('\n')
for i in range(len(lines)):
- if lines[i].startswith(' '*util.TAB_LENGTH*level):
- lines[i] = lines[i][util.TAB_LENGTH*level:]
+ if lines[i].startswith(' '*self.TAB_LENGTH*level):
+ lines[i] = lines[i][self.TAB_LENGTH*level:]
return '\n'.join(lines)
def test(self, parent, block):
@@ -115,12 +116,15 @@ class ListIndentProcessor(BlockProcessor):
"""
- INDENT_RE = re.compile(r'^(([ ]{%s})+)'% util.TAB_LENGTH)
ITEM_TYPES = ['li']
LIST_TYPES = ['ul', 'ol']
+ def __init__(self, *args):
+ BlockProcessor.__init__(self, *args)
+ self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.TAB_LENGTH)
+
def test(self, parent, block):
- return block.startswith(' '*util.TAB_LENGTH) and \
+ return block.startswith(' '*self.TAB_LENGTH) and \
not self.parser.state.isstate('detabbed') and \
(parent.tag in self.ITEM_TYPES or \
(len(parent) and parent[-1] and \
@@ -174,7 +178,7 @@ class ListIndentProcessor(BlockProcessor):
# Get indent level
m = self.INDENT_RE.match(block)
if m:
- indent_level = len(m.group(1))/util.TAB_LENGTH
+ indent_level = len(m.group(1))/self.TAB_LENGTH
else:
indent_level = 0
if self.parser.state.isstate('list'):
@@ -201,7 +205,7 @@ class CodeBlockProcessor(BlockProcessor):
""" Process code blocks. """
def test(self, parent, block):
- return block.startswith(' '*util.TAB_LENGTH)
+ return block.startswith(' '*self.TAB_LENGTH)
def run(self, parent, blocks):
sibling = self.lastChild(parent)
@@ -321,7 +325,7 @@ class OListProcessor(BlockProcessor):
# Loop through items in block, recursively parsing each with the
# appropriate parent.
for item in items:
- if item.startswith(' '*util.TAB_LENGTH):
+ if item.startswith(' '*self.TAB_LENGTH):
# Item is indented. Parse with last item as parent
self.parser.parseBlocks(lst[-1], [item])
else:
@@ -340,7 +344,7 @@ class OListProcessor(BlockProcessor):
items.append(m.group(3))
elif self.INDENT_RE.match(line):
# This is an indented (possibly nested) item.
- if items[-1].startswith(' '*util.TAB_LENGTH):
+ if items[-1].startswith(' '*self.TAB_LENGTH):
# Previous item was indented. Append to that item.
items[-1] = '%s\n%s' % (items[-1], line)
else:
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index ceeef0a..a718b7e 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -67,12 +67,8 @@ ESCAPE_RE = r'\\(.)' # \<
EMPHASIS_RE = r'(\*)([^\*]+)\2' # *emphasis*
STRONG_RE = r'(\*{2}|_{2})(.+?)\2' # **strong**
STRONG_EM_RE = r'(\*{3}|_{3})(.+?)\2' # ***strong***
-
-if util.SMART_EMPHASIS:
- EMPHASIS_2_RE = r'(?<!\w)(_)(\S.+?)\2(?!\w)' # _emphasis_
-else:
- EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_
-
+SMART_EMPHASIS_RE = r'(?<!\w)(_)(\S.+?)\2(?!\w)' # _smart_emphasis_
+EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_
LINK_RE = NOIMG + BRK + \
r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12)?\)'''
# [text](url) or [text](<url>)
@@ -287,7 +283,7 @@ class ImagePattern(LinkPattern):
if len(src_parts) > 1:
el.set('title', dequote(" ".join(src_parts[1:])))
- if util.ENABLE_ATTRIBUTES:
+ if self.markdown.ENABLE_ATTRIBUTES:
truealt = handleAttributes(m.group(2), el)
else:
truealt = m.group(2)
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index 2093289..41c34fc 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -45,7 +45,7 @@ class RawHtmlPostprocessor(Postprocessor):
elif str(self.markdown.safeMode).lower() == 'remove':
html = ''
else:
- html = util.HTML_REMOVED_TEXT
+ html = self.markdown.HTML_REMOVED_TEXT
if safe or not self.markdown.safeMode:
text = text.replace("<p>%s</p>" %
(self.markdown.htmlStash.get_placeholder(i)),
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index b33472e..6aeb142 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -277,7 +277,7 @@ class InlineProcessor(Treeprocessor):
if child.getchildren():
stack.append(child)
- if util.ENABLE_ATTRIBUTES:
+ if self.markdown.ENABLE_ATTRIBUTES:
for element, lst in insertQueue:
if element.text:
element.text = \
diff --git a/markdown/util.py b/markdown/util.py
index 9d1531a..f41469b 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -15,18 +15,10 @@ Constants you might want to modify
-----------------------------------------------------------------------------
"""
-
-TAB_LENGTH = 4 # expand tabs to this many spaces
-ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
-SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
-DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output
-HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
"|script|noscript|form|fieldset|iframe|math"
"|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
"|tr|th|td")
-DOC_TAG = "div" # Element used to wrap document - later removed
-
# Placeholders
STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder
ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder