diff options
-rw-r--r-- | markdown/__init__.py | 35 | ||||
-rw-r--r-- | markdown/blockparser.py | 5 | ||||
-rw-r--r-- | markdown/blockprocessors.py | 26 | ||||
-rw-r--r-- | markdown/inlinepatterns.py | 10 | ||||
-rw-r--r-- | markdown/postprocessors.py | 2 | ||||
-rw-r--r-- | markdown/treeprocessors.py | 2 | ||||
-rw-r--r-- | markdown/util.py | 8 |
7 files changed, 48 insertions, 40 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py index c7988b8..6d16b0e 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -90,11 +90,18 @@ import html4 class Markdown: """Convert Markdown to HTML.""" + TAB_LENGTH = 4 # expand tabs to this many spaces + ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> + SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that + DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output + HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode + DOC_TAG = "div" # Element used to wrap document - later removed + def __init__(self, extensions=[], extension_configs={}, safe_mode = False, - output_format=util.DEFAULT_OUTPUT_FORMAT): + output_format=None): """ Creates a new Markdown instance. @@ -131,7 +138,7 @@ class Markdown: # footnote preprocessor will be inserted with "<reference" # Block processors - ran by the parser - self.parser = blockparser.BlockParser() + self.parser = blockparser.BlockParser(self) self.parser.blockprocessors['empty'] = \ blockprocessors.EmptyBlockProcessor(self.parser) self.parser.blockprocessors['indent'] = \ @@ -190,8 +197,14 @@ class Markdown: inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong') self.inlinePatterns["emphasis"] = \ inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em') - self.inlinePatterns["emphasis2"] = \ - inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em') + if self.SMART_EMPHASIS: + self.inlinePatterns["emphasis2"] = \ + inlinepatterns.SimpleTagPattern( \ + inlinepatterns.SMART_EMPHASIS_RE, 'em') + else: + self.inlinePatterns["emphasis2"] = \ + inlinepatterns.SimpleTagPattern( \ + inlinepatterns.EMPHASIS_2_RE, 'em') # The order of the handlers matters!!! @@ -265,6 +278,8 @@ class Markdown: def set_output_format(self, format): """ Set the output format for the class instance. """ + if format is None: + format = self.DEFAULT_OUTPUT_FORMAT try: self.serializer = self.output_formats[format.lower()] except KeyError: @@ -295,7 +310,7 @@ class Markdown: source = source.replace(util.STX, "").replace(util.ETX, "") source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" source = re.sub(r'\n\s+\n', '\n\n', source) - source = source.expandtabs(util.TAB_LENGTH) + source = source.expandtabs(self.TAB_LENGTH) # Split into lines and run the line preprocessors. self.lines = source.split("\n") @@ -315,11 +330,11 @@ class Markdown: output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8")) if self.stripTopLevelTags: try: - start = output.index('<%s>'%util.DOC_TAG)+len(util.DOC_TAG)+2 - end = output.rindex('</%s>'%util.DOC_TAG) + start = output.index('<%s>'%self.DOC_TAG)+len(self.DOC_TAG)+2 + end = output.rindex('</%s>'%self.DOC_TAG) output = output[start:end].strip() except ValueError: - if output.strip().endswith('<%s />'%util.DOC_TAG): + if output.strip().endswith('<%s />'%self.DOC_TAG): # We have an empty document output = '' else: @@ -386,7 +401,7 @@ markdownFromFile(). def markdown(text, extensions = [], safe_mode = False, - output_format = util.DEFAULT_OUTPUT_FORMAT): + output_format = None): """Convert a markdown string to HTML and return HTML as a unicode string. This is a shortcut function for `Markdown` class to cover the most @@ -421,7 +436,7 @@ def markdownFromFile(input = None, extensions = [], encoding = None, safe_mode = False, - output_format = util.DEFAULT_OUTPUT_FORMAT): + output_format = None): """Read markdown code from a file and write it to a file or a stream.""" md = Markdown(extensions=load_extensions(extensions), safe_mode=safe_mode, diff --git a/markdown/blockparser.py b/markdown/blockparser.py index afbdabd..533b695 100644 --- a/markdown/blockparser.py +++ b/markdown/blockparser.py @@ -42,9 +42,10 @@ class BlockParser: looping through them and creating an ElementTree object. """ - def __init__(self): + def __init__(self, markdown): self.blockprocessors = odict.OrderedDict() self.state = State() + self.markdown = markdown def parseDocument(self, lines): """ Parse a markdown document into an ElementTree. @@ -57,7 +58,7 @@ class BlockParser: """ # Create a ElementTree from the lines - self.root = util.etree.Element(util.DOC_TAG) + self.root = util.etree.Element(self.markdown.DOC_TAG) self.parseChunk(self.root, '\n'.join(lines)) return util.etree.ElementTree(self.root) diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index a64be2a..4e39200 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -28,8 +28,9 @@ class BlockProcessor: """ - def __init__(self, parser=None): + def __init__(self, parser): self.parser = parser + self.TAB_LENGTH = parser.markdown.TAB_LENGTH def lastChild(self, parent): """ Return the last child of an etree element. """ @@ -43,8 +44,8 @@ class BlockProcessor: newtext = [] lines = text.split('\n') for line in lines: - if line.startswith(' '*util.TAB_LENGTH): - newtext.append(line[util.TAB_LENGTH:]) + if line.startswith(' '*self.TAB_LENGTH): + newtext.append(line[self.TAB_LENGTH:]) elif not line.strip(): newtext.append('') else: @@ -55,8 +56,8 @@ class BlockProcessor: """ Remove a tab from front of lines but allowing dedented lines. """ lines = text.split('\n') for i in range(len(lines)): - if lines[i].startswith(' '*util.TAB_LENGTH*level): - lines[i] = lines[i][util.TAB_LENGTH*level:] + if lines[i].startswith(' '*self.TAB_LENGTH*level): + lines[i] = lines[i][self.TAB_LENGTH*level:] return '\n'.join(lines) def test(self, parent, block): @@ -115,12 +116,15 @@ class ListIndentProcessor(BlockProcessor): """ - INDENT_RE = re.compile(r'^(([ ]{%s})+)'% util.TAB_LENGTH) ITEM_TYPES = ['li'] LIST_TYPES = ['ul', 'ol'] + def __init__(self, *args): + BlockProcessor.__init__(self, *args) + self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.TAB_LENGTH) + def test(self, parent, block): - return block.startswith(' '*util.TAB_LENGTH) and \ + return block.startswith(' '*self.TAB_LENGTH) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or \ (len(parent) and parent[-1] and \ @@ -174,7 +178,7 @@ class ListIndentProcessor(BlockProcessor): # Get indent level m = self.INDENT_RE.match(block) if m: - indent_level = len(m.group(1))/util.TAB_LENGTH + indent_level = len(m.group(1))/self.TAB_LENGTH else: indent_level = 0 if self.parser.state.isstate('list'): @@ -201,7 +205,7 @@ class CodeBlockProcessor(BlockProcessor): """ Process code blocks. """ def test(self, parent, block): - return block.startswith(' '*util.TAB_LENGTH) + return block.startswith(' '*self.TAB_LENGTH) def run(self, parent, blocks): sibling = self.lastChild(parent) @@ -321,7 +325,7 @@ class OListProcessor(BlockProcessor): # Loop through items in block, recursively parsing each with the # appropriate parent. for item in items: - if item.startswith(' '*util.TAB_LENGTH): + if item.startswith(' '*self.TAB_LENGTH): # Item is indented. Parse with last item as parent self.parser.parseBlocks(lst[-1], [item]) else: @@ -340,7 +344,7 @@ class OListProcessor(BlockProcessor): items.append(m.group(3)) elif self.INDENT_RE.match(line): # This is an indented (possibly nested) item. - if items[-1].startswith(' '*util.TAB_LENGTH): + if items[-1].startswith(' '*self.TAB_LENGTH): # Previous item was indented. Append to that item. items[-1] = '%s\n%s' % (items[-1], line) else: diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index ceeef0a..a718b7e 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -67,12 +67,8 @@ ESCAPE_RE = r'\\(.)' # \< EMPHASIS_RE = r'(\*)([^\*]+)\2' # *emphasis* STRONG_RE = r'(\*{2}|_{2})(.+?)\2' # **strong** STRONG_EM_RE = r'(\*{3}|_{3})(.+?)\2' # ***strong*** - -if util.SMART_EMPHASIS: - EMPHASIS_2_RE = r'(?<!\w)(_)(\S.+?)\2(?!\w)' # _emphasis_ -else: - EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_ - +SMART_EMPHASIS_RE = r'(?<!\w)(_)(\S.+?)\2(?!\w)' # _smart_emphasis_ +EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_ LINK_RE = NOIMG + BRK + \ r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12)?\)''' # [text](url) or [text](<url>) @@ -287,7 +283,7 @@ class ImagePattern(LinkPattern): if len(src_parts) > 1: el.set('title', dequote(" ".join(src_parts[1:]))) - if util.ENABLE_ATTRIBUTES: + if self.markdown.ENABLE_ATTRIBUTES: truealt = handleAttributes(m.group(2), el) else: truealt = m.group(2) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 2093289..41c34fc 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -45,7 +45,7 @@ class RawHtmlPostprocessor(Postprocessor): elif str(self.markdown.safeMode).lower() == 'remove': html = '' else: - html = util.HTML_REMOVED_TEXT + html = self.markdown.HTML_REMOVED_TEXT if safe or not self.markdown.safeMode: text = text.replace("<p>%s</p>" % (self.markdown.htmlStash.get_placeholder(i)), diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index b33472e..6aeb142 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -277,7 +277,7 @@ class InlineProcessor(Treeprocessor): if child.getchildren(): stack.append(child) - if util.ENABLE_ATTRIBUTES: + if self.markdown.ENABLE_ATTRIBUTES: for element, lst in insertQueue: if element.text: element.text = \ diff --git a/markdown/util.py b/markdown/util.py index 9d1531a..f41469b 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -15,18 +15,10 @@ Constants you might want to modify ----------------------------------------------------------------------------- """ - -TAB_LENGTH = 4 # expand tabs to this many spaces -ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> -SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that -DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output -HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" "|script|noscript|form|fieldset|iframe|math" "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody" "|tr|th|td") -DOC_TAG = "div" # Element used to wrap document - later removed - # Placeholders STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder |