From 9b1de64b9e4a049f3fd5c9efc343f0e37f7ce457 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Wed, 7 Jul 2010 11:50:41 -0400 Subject: A better implementation of globals as attributes on the Markdown class. This should be more future proof. --- markdown/__init__.py | 73 +++++++++++++++++++++++---------------------- markdown/blockparser.py | 2 +- markdown/blockprocessors.py | 22 +++++++------- markdown/inlinepatterns.py | 4 +-- markdown/postprocessors.py | 2 +- markdown/treeprocessors.py | 2 +- 6 files changed, 53 insertions(+), 52 deletions(-) diff --git a/markdown/__init__.py b/markdown/__init__.py index ef6603e..4881587 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -75,30 +75,34 @@ from treeprocessors import build_treeprocessors from inlinepatterns import build_inlinepatterns from postprocessors import build_postprocessors from extensions import Extension, load_extension, load_extensions +import html4 # For backwards compatibility in the 2.0.x series # The things defined in these modules started off in __init__.py so third # party code might need to access them here. from util import * -# Adds the ability to output html4 -import html4 - class Markdown: """Convert Markdown to HTML.""" - TAB_LENGTH = 4 # expand tabs to this many spaces - ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> - SMART_EMPHASIS = True # this_or_that does not become thisorthat - HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode - DOC_TAG = "div" # Element used to wrap document - later removed - - def __init__(self, - extensions=[], - extension_configs={}, - safe_mode = False, - output_format=None): + doc_tag = "div" # Element used to wrap document - later removed + + option_defaults = { + 'html_replacement_text' : '[HTML_REMOVED]', + 'tab_length' : 4, + 'enable_attributes' : True, + 'smart_emphasis' : True, + } + + output_formats = { + 'html' : html4.to_html_string, + 'html4' : html4.to_html_string, + 'xhtml' : util.etree.tostring, + 'xhtml1': util.etree.tostring, + } + + def __init__(self, extensions=[], **kwargs): """ Creates a new Markdown instance. @@ -108,8 +112,7 @@ class Markdown: If they are of type string, the module mdx_name.py will be loaded. If they are a subclass of markdown.Extension, they will be used as-is. - * extension-configs: Configuration setting for extensions. - * safe_mode: Disallow raw html. One of "remove", "replace" or "escape". + * extension-configs: Configuration settingis for extensions. * output_format: Format of output. Supported formats are: * "xhtml1": Outputs XHTML 1.x. Default. * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1). @@ -118,29 +121,29 @@ class Markdown: Note that it is suggested that the more specific formats ("xhtml1" and "html4") be used as "xhtml" or "html" may change in the future if it makes sense at that time. + * safe_mode: Disallow raw html. One of "remove", "replace" or "escape". + * html_replacement_text: Text used when safe_mode is set to "replace". + * tab_length: Length of tabs in the source. Default: 4 + * enable_attributes: Enable the conversion of attributes. Default: True + * smart_emphsasis: Treat `_connected_words_` intelegently Default: True """ - self.safeMode = safe_mode + for option, default in self.option_defaults.items(): + setattr(self, option, kwargs.get(option, default)) + + self.safeMode = kwargs.get('safe_mode', False) self.registeredExtensions = [] self.docType = "" self.stripTopLevelTags = True self.build_parser() - # Map format keys to serializers - self.output_formats = { - 'html' : html4.to_html_string, - 'html4' : html4.to_html_string, - 'xhtml' : util.etree.tostring, - 'xhtml1': util.etree.tostring, - } - self.references = {} self.htmlStash = util.HtmlStash() self.registerExtensions(extensions = extensions, - configs = extension_configs) - self.set_output_format(output_format) + configs = kwargs.get('extension_configs', {})) + self.set_output_format(kwargs.get('output_format', 'xhtml1')) self.reset() def build_parser(self): @@ -190,10 +193,8 @@ class Markdown: if hasattr(extension, 'reset'): extension.reset() - def set_output_format(self, format=None): + def set_output_format(self, format): """ Set the output format for the class instance. """ - if format is None: - format = 'xhtml1' #DEFAULT try: self.serializer = self.output_formats[format.lower()] except KeyError: @@ -224,7 +225,7 @@ class Markdown: source = source.replace(util.STX, "").replace(util.ETX, "") source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" source = re.sub(r'\n\s+\n', '\n\n', source) - source = source.expandtabs(self.TAB_LENGTH) + source = source.expandtabs(self.tab_length) # Split into lines and run the line preprocessors. self.lines = source.split("\n") @@ -244,11 +245,11 @@ class Markdown: output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8")) if self.stripTopLevelTags: try: - start = output.index('<%s>'%self.DOC_TAG)+len(self.DOC_TAG)+2 - end = output.rindex(''%self.DOC_TAG) + start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2 + end = output.rindex(''%self.doc_tag) output = output[start:end].strip() except ValueError: - if output.strip().endswith('<%s />'%self.DOC_TAG): + if output.strip().endswith('<%s />'%self.doc_tag): # We have an empty document output = '' else: @@ -315,7 +316,7 @@ markdownFromFile(). def markdown(text, extensions = [], safe_mode = False, - output_format = None): + output_format = 'xhtml1'): """Convert a markdown string to HTML and return HTML as a unicode string. This is a shortcut function for `Markdown` class to cover the most @@ -350,7 +351,7 @@ def markdownFromFile(input = None, extensions = [], encoding = None, safe_mode = False, - output_format = None): + output_format = 'xhtml1'): """Read markdown code from a file and write it to a file or a stream.""" md = Markdown(extensions=load_extensions(extensions), safe_mode=safe_mode, diff --git a/markdown/blockparser.py b/markdown/blockparser.py index 533b695..fae136c 100644 --- a/markdown/blockparser.py +++ b/markdown/blockparser.py @@ -58,7 +58,7 @@ class BlockParser: """ # Create a ElementTree from the lines - self.root = util.etree.Element(self.markdown.DOC_TAG) + self.root = util.etree.Element(self.markdown.doc_tag) self.parseChunk(self.root, '\n'.join(lines)) return util.etree.ElementTree(self.root) diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 399a523..77fbc71 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -48,7 +48,7 @@ class BlockProcessor: def __init__(self, parser): self.parser = parser - self.TAB_LENGTH = parser.markdown.TAB_LENGTH + self.tab_length = parser.markdown.tab_length def lastChild(self, parent): """ Return the last child of an etree element. """ @@ -62,8 +62,8 @@ class BlockProcessor: newtext = [] lines = text.split('\n') for line in lines: - if line.startswith(' '*self.TAB_LENGTH): - newtext.append(line[self.TAB_LENGTH:]) + if line.startswith(' '*self.tab_length): + newtext.append(line[self.tab_length:]) elif not line.strip(): newtext.append('') else: @@ -74,8 +74,8 @@ class BlockProcessor: """ Remove a tab from front of lines but allowing dedented lines. """ lines = text.split('\n') for i in range(len(lines)): - if lines[i].startswith(' '*self.TAB_LENGTH*level): - lines[i] = lines[i][self.TAB_LENGTH*level:] + if lines[i].startswith(' '*self.tab_length*level): + lines[i] = lines[i][self.tab_length*level:] return '\n'.join(lines) def test(self, parent, block): @@ -139,10 +139,10 @@ class ListIndentProcessor(BlockProcessor): def __init__(self, *args): BlockProcessor.__init__(self, *args) - self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.TAB_LENGTH) + self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.tab_length) def test(self, parent, block): - return block.startswith(' '*self.TAB_LENGTH) and \ + return block.startswith(' '*self.tab_length) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or \ (len(parent) and parent[-1] and \ @@ -196,7 +196,7 @@ class ListIndentProcessor(BlockProcessor): # Get indent level m = self.INDENT_RE.match(block) if m: - indent_level = len(m.group(1))/self.TAB_LENGTH + indent_level = len(m.group(1))/self.tab_length else: indent_level = 0 if self.parser.state.isstate('list'): @@ -223,7 +223,7 @@ class CodeBlockProcessor(BlockProcessor): """ Process code blocks. """ def test(self, parent, block): - return block.startswith(' '*self.TAB_LENGTH) + return block.startswith(' '*self.tab_length) def run(self, parent, blocks): sibling = self.lastChild(parent) @@ -343,7 +343,7 @@ class OListProcessor(BlockProcessor): # Loop through items in block, recursively parsing each with the # appropriate parent. for item in items: - if item.startswith(' '*self.TAB_LENGTH): + if item.startswith(' '*self.tab_length): # Item is indented. Parse with last item as parent self.parser.parseBlocks(lst[-1], [item]) else: @@ -362,7 +362,7 @@ class OListProcessor(BlockProcessor): items.append(m.group(3)) elif self.INDENT_RE.match(line): # This is an indented (possibly nested) item. - if items[-1].startswith(' '*self.TAB_LENGTH): + if items[-1].startswith(' '*self.tab_length): # Previous item was indented. Append to that item. items[-1] = '%s\n%s' % (items[-1], line) else: diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 7a78b5e..e4a9dd7 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -72,7 +72,7 @@ def build_inlinepatterns(md_instance, **kwargs): inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'strong,em') inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong') inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em') - if md_instance.SMART_EMPHASIS: + if md_instance.smart_emphasis: inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em') else: inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em') @@ -311,7 +311,7 @@ class ImagePattern(LinkPattern): if len(src_parts) > 1: el.set('title', dequote(" ".join(src_parts[1:]))) - if self.markdown.ENABLE_ATTRIBUTES: + if self.markdown.enable_attributes: truealt = handleAttributes(m.group(2), el) else: truealt = m.group(2) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 4d93082..cd90eb8 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -54,7 +54,7 @@ class RawHtmlPostprocessor(Postprocessor): elif str(self.markdown.safeMode).lower() == 'remove': html = '' else: - html = self.markdown.HTML_REMOVED_TEXT + html = self.markdown.html_replacement_text if safe or not self.markdown.safeMode: text = text.replace("

%s

" % (self.markdown.htmlStash.get_placeholder(i)), diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 50df486..9178afa 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -287,7 +287,7 @@ class InlineProcessor(Treeprocessor): if child.getchildren(): stack.append(child) - if self.markdown.ENABLE_ATTRIBUTES: + if self.markdown.enable_attributes: for element, lst in insertQueue: if element.text: element.text = \ -- cgit v1.2.3