From 2eb00c816c6dc31dbbec6c47f8c7ff70ed0785d2 Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Mon, 5 Jul 2010 22:53:31 -0400 Subject: Rename misc.py to util.py at the request of upstream --- markdown/__init__.py | 24 ++++++------- markdown/blockparser.py | 6 ++-- markdown/blockprocessors.py | 54 ++++++++++++++--------------- markdown/extensions/abbr.py | 2 +- markdown/extensions/def_list.py | 2 +- markdown/extensions/footnotes.py | 2 +- markdown/extensions/headerid.py | 2 +- markdown/extensions/rss.py | 2 +- markdown/extensions/tables.py | 2 +- markdown/extensions/toc.py | 2 +- markdown/extensions/wikilinks.py | 2 +- markdown/html4.py | 12 +++---- markdown/inlinepatterns.py | 42 +++++++++++------------ markdown/misc.py | 73 ---------------------------------------- markdown/postprocessors.py | 8 ++--- markdown/preprocessors.py | 12 +++---- markdown/treeprocessors.py | 24 ++++++------- markdown/util.py | 73 ++++++++++++++++++++++++++++++++++++++++ 18 files changed, 172 insertions(+), 172 deletions(-) delete mode 100644 markdown/misc.py create mode 100644 markdown/util.py diff --git a/markdown/__init__.py b/markdown/__init__.py index ac11f1d..d7ec4f2 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -64,7 +64,7 @@ Markdown processing takes place in four steps: Those steps are put together by the Markdown() class. """ -import misc +import util import misc_logging import preprocessors import blockprocessors @@ -77,7 +77,7 @@ import odict # For backwards compatibility in the 2.0.x series # The things defined in these modules started off in __init__.py so third # party code might need to access them here. -from misc import * +from util import * from misc_logging import * # Adds the ability to output html4 @@ -91,7 +91,7 @@ class Markdown: extensions=[], extension_configs={}, safe_mode = False, - output_format=misc.DEFAULT_OUTPUT_FORMAT): + output_format=util.DEFAULT_OUTPUT_FORMAT): """ Creates a new Markdown instance. @@ -210,8 +210,8 @@ class Markdown: self.output_formats = { 'html' : html4.to_html_string, 'html4' : html4.to_html_string, - 'xhtml' : misc.etree.tostring, - 'xhtml1': misc.etree.tostring, + 'xhtml' : util.etree.tostring, + 'xhtml1': util.etree.tostring, } self.references = {} @@ -289,10 +289,10 @@ class Markdown: 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.') return u"" - source = source.replace(misc.STX, "").replace(misc.ETX, "") + source = source.replace(util.STX, "").replace(util.ETX, "") source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" source = re.sub(r'\n\s+\n', '\n\n', source) - source = source.expandtabs(misc.TAB_LENGTH) + source = source.expandtabs(util.TAB_LENGTH) # Split into lines and run the line preprocessors. self.lines = source.split("\n") @@ -312,11 +312,11 @@ class Markdown: output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf-8")) if self.stripTopLevelTags: try: - start = output.index('<%s>'%misc.DOC_TAG)+len(misc.DOC_TAG)+2 - end = output.rindex(''%misc.DOC_TAG) + start = output.index('<%s>'%util.DOC_TAG)+len(util.DOC_TAG)+2 + end = output.rindex(''%util.DOC_TAG) output = output[start:end].strip() except ValueError: - if output.strip().endswith('<%s />'%misc.DOC_TAG): + if output.strip().endswith('<%s />'%util.DOC_TAG): # We have an empty document output = '' else: @@ -483,7 +483,7 @@ markdownFromFile(). def markdown(text, extensions = [], safe_mode = False, - output_format = misc.DEFAULT_OUTPUT_FORMAT): + output_format = util.DEFAULT_OUTPUT_FORMAT): """Convert a markdown string to HTML and return HTML as a unicode string. This is a shortcut function for `Markdown` class to cover the most @@ -518,7 +518,7 @@ def markdownFromFile(input = None, extensions = [], encoding = None, safe_mode = False, - output_format = misc.DEFAULT_OUTPUT_FORMAT): + output_format = util.DEFAULT_OUTPUT_FORMAT): """Read markdown code from a file and write it to a file or a stream.""" md = Markdown(extensions=load_extensions(extensions), safe_mode=safe_mode, diff --git a/markdown/blockparser.py b/markdown/blockparser.py index 915d6c0..afbdabd 100644 --- a/markdown/blockparser.py +++ b/markdown/blockparser.py @@ -1,5 +1,5 @@ -import misc +import util import odict class State(list): @@ -57,9 +57,9 @@ class BlockParser: """ # Create a ElementTree from the lines - self.root = misc.etree.Element(misc.DOC_TAG) + self.root = util.etree.Element(util.DOC_TAG) self.parseChunk(self.root, '\n'.join(lines)) - return misc.etree.ElementTree(self.root) + return util.etree.ElementTree(self.root) def parseChunk(self, parent, text): """ Parse a chunk of markdown text and attach to given etree node. diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 3aeef34..6e1f146 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -13,7 +13,7 @@ as they need to alter how markdown blocks are parsed. """ import re -import misc +import util from misc_logging import CRITICAL, message class BlockProcessor: @@ -42,8 +42,8 @@ class BlockProcessor: newtext = [] lines = text.split('\n') for line in lines: - if line.startswith(' '*misc.TAB_LENGTH): - newtext.append(line[misc.TAB_LENGTH:]) + if line.startswith(' '*util.TAB_LENGTH): + newtext.append(line[util.TAB_LENGTH:]) elif not line.strip(): newtext.append('') else: @@ -54,8 +54,8 @@ class BlockProcessor: """ Remove a tab from front of lines but allowing dedented lines. """ lines = text.split('\n') for i in range(len(lines)): - if lines[i].startswith(' '*misc.TAB_LENGTH*level): - lines[i] = lines[i][misc.TAB_LENGTH*level:] + if lines[i].startswith(' '*util.TAB_LENGTH*level): + lines[i] = lines[i][util.TAB_LENGTH*level:] return '\n'.join(lines) def test(self, parent, block): @@ -114,12 +114,12 @@ class ListIndentProcessor(BlockProcessor): """ - INDENT_RE = re.compile(r'^(([ ]{%s})+)'% misc.TAB_LENGTH) + INDENT_RE = re.compile(r'^(([ ]{%s})+)'% util.TAB_LENGTH) ITEM_TYPES = ['li'] LIST_TYPES = ['ul', 'ol'] def test(self, parent, block): - return block.startswith(' '*misc.TAB_LENGTH) and \ + return block.startswith(' '*util.TAB_LENGTH) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or \ (len(parent) and parent[-1] and \ @@ -154,7 +154,7 @@ class ListIndentProcessor(BlockProcessor): # If the parent li has text, that text needs to be moved to a p # The p must be 'inserted' at beginning of list in the event # that other children already exist i.e.; a nested sublist. - p = misc.etree.Element('p') + p = util.etree.Element('p') p.text = sibling[-1].text sibling[-1].text = '' sibling[-1].insert(0, p) @@ -165,7 +165,7 @@ class ListIndentProcessor(BlockProcessor): def create_item(self, parent, block): """ Create a new li and parse the block with it as the parent. """ - li = misc.etree.SubElement(parent, 'li') + li = util.etree.SubElement(parent, 'li') self.parser.parseBlocks(li, [block]) def get_level(self, parent, block): @@ -173,7 +173,7 @@ class ListIndentProcessor(BlockProcessor): # Get indent level m = self.INDENT_RE.match(block) if m: - indent_level = len(m.group(1))/misc.TAB_LENGTH + indent_level = len(m.group(1))/util.TAB_LENGTH else: indent_level = 0 if self.parser.state.isstate('list'): @@ -200,7 +200,7 @@ class CodeBlockProcessor(BlockProcessor): """ Process code blocks. """ def test(self, parent, block): - return block.startswith(' '*misc.TAB_LENGTH) + return block.startswith(' '*util.TAB_LENGTH) def run(self, parent, blocks): sibling = self.lastChild(parent) @@ -213,13 +213,13 @@ class CodeBlockProcessor(BlockProcessor): # linebreaks removed from the split into a list. code = sibling[0] block, theRest = self.detab(block) - code.text = misc.AtomicString('%s\n%s\n' % (code.text, block.rstrip())) + code.text = util.AtomicString('%s\n%s\n' % (code.text, block.rstrip())) else: # This is a new codeblock. Create the elements and insert text. - pre = misc.etree.SubElement(parent, 'pre') - code = misc.etree.SubElement(pre, 'code') + pre = util.etree.SubElement(parent, 'pre') + code = util.etree.SubElement(pre, 'code') block, theRest = self.detab(block) - code.text = misc.AtomicString('%s\n' % block.rstrip()) + code.text = util.AtomicString('%s\n' % block.rstrip()) if theRest: # This block contained unindented line(s) after the first indented # line. Insert these lines as the first block of the master blocks @@ -250,7 +250,7 @@ class BlockQuoteProcessor(BlockProcessor): quote = sibling else: # This is a new blockquote. Create a new parent element. - quote = misc.etree.SubElement(parent, 'blockquote') + quote = util.etree.SubElement(parent, 'blockquote') # Recursively parse block with blockquote as parent. # change parser state so blockquotes embedded in lists use p tags self.parser.state.set('blockquote') @@ -295,13 +295,13 @@ class OListProcessor(BlockProcessor): # since it's possible there are other children for this sibling, # we can't just SubElement the p, we need to insert it as the # first item - p = misc.etree.Element('p') + p = util.etree.Element('p') p.text = lst[-1].text lst[-1].text = '' lst[-1].insert(0, p) # parse first block differently as it gets wrapped in a p. - li = misc.etree.SubElement(lst, 'li') + li = util.etree.SubElement(lst, 'li') self.parser.state.set('looselist') firstitem = items.pop(0) self.parser.parseBlocks(li, [firstitem]) @@ -315,17 +315,17 @@ class OListProcessor(BlockProcessor): lst = parent else: # This is a new list so create parent with appropriate tag. - lst = misc.etree.SubElement(parent, self.TAG) + lst = util.etree.SubElement(parent, self.TAG) self.parser.state.set('list') # Loop through items in block, recursively parsing each with the # appropriate parent. for item in items: - if item.startswith(' '*misc.TAB_LENGTH): + if item.startswith(' '*util.TAB_LENGTH): # Item is indented. Parse with last item as parent self.parser.parseBlocks(lst[-1], [item]) else: # New item. Create li and parse with it as parent - li = misc.etree.SubElement(lst, 'li') + li = util.etree.SubElement(lst, 'li') self.parser.parseBlocks(li, [item]) self.parser.state.reset() @@ -339,7 +339,7 @@ class OListProcessor(BlockProcessor): items.append(m.group(3)) elif self.INDENT_RE.match(line): # This is an indented (possibly nested) item. - if items[-1].startswith(' '*misc.TAB_LENGTH): + if items[-1].startswith(' '*util.TAB_LENGTH): # Previous item was indented. Append to that item. items[-1] = '%s\n%s' % (items[-1], line) else: @@ -378,7 +378,7 @@ class HashHeaderProcessor(BlockProcessor): # recursively parse this lines as a block. self.parser.parseBlocks(parent, [before]) # Create header using named groups from RE - h = misc.etree.SubElement(parent, 'h%d' % len(m.group('level'))) + h = util.etree.SubElement(parent, 'h%d' % len(m.group('level'))) h.text = m.group('header').strip() if after: # Insert remaining lines as first block for future parsing. @@ -404,7 +404,7 @@ class SetextHeaderProcessor(BlockProcessor): level = 1 else: level = 2 - h = misc.etree.SubElement(parent, 'h%d' % level) + h = util.etree.SubElement(parent, 'h%d' % level) h.text = lines[0].strip() if len(lines) > 2: # Block contains additional lines. Add to master blocks for later. @@ -437,7 +437,7 @@ class HRProcessor(BlockProcessor): # Recursively parse lines before hr so they get parsed first. self.parser.parseBlocks(parent, ['\n'.join(prelines)]) # create hr - hr = misc.etree.SubElement(parent, 'hr') + hr = util.etree.SubElement(parent, 'hr') # check for lines in block after hr. lines = lines[len(prelines)+1:] if len(lines): @@ -465,7 +465,7 @@ class EmptyBlockProcessor(BlockProcessor): if sibling and sibling.tag == 'pre' and sibling[0] and \ sibling[0].tag == 'code': # Last block is a codeblock. Append to preserve whitespace. - sibling[0].text = misc.AtomicString('%s/n/n/n' % sibling[0].text ) + sibling[0].text = util.AtomicString('%s/n/n/n' % sibling[0].text ) class ParagraphProcessor(BlockProcessor): @@ -486,5 +486,5 @@ class ParagraphProcessor(BlockProcessor): parent.text = block.lstrip() else: # Create a regular paragraph - p = misc.etree.SubElement(parent, 'p') + p = util.etree.SubElement(parent, 'p') p.text = block.lstrip() diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index e2443a7..bc346cc 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -25,7 +25,7 @@ Copyright 2007-2008 import re import markdown -from markdown.misc import etree +from markdown.util import etree # Global Vars ABBR_REF_RE = re.compile(r'[*]\[(?P[^\]]*)\][ ]?:\s*(?P.*)') diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py index d16196e..b5ba92f 100644 --- a/markdown/extensions/def_list.py +++ b/markdown/extensions/def_list.py @@ -21,7 +21,7 @@ Copyright 2008 - [Waylan Limberg](http://achinghead.com) import re import markdown -from markdown.misc import etree +from markdown.util import etree class DefListProcessor(markdown.blockprocessors.BlockProcessor): diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index a3d540f..f9e5101 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -25,7 +25,7 @@ Example: import re import markdown -from markdown.misc import etree +from markdown.util import etree FN_BACKLINK_TEXT = "zz1337820767766393qq" NBSP_PLACEHOLDER = "qq3936677670287331zz" diff --git a/markdown/extensions/headerid.py b/markdown/extensions/headerid.py index de6bcf1..762301c 100644 --- a/markdown/extensions/headerid.py +++ b/markdown/extensions/headerid.py @@ -66,7 +66,7 @@ Dependencies: """ import markdown -from markdown.misc import etree +from markdown.util import etree from markdown.misc_logging import CRITICAL, message import re from string import ascii_lowercase, digits, punctuation diff --git a/markdown/extensions/rss.py b/markdown/extensions/rss.py index 64f7be1..ae43220 100644 --- a/markdown/extensions/rss.py +++ b/markdown/extensions/rss.py @@ -1,5 +1,5 @@ import markdown -from markdown.misc import etree +from markdown.util import etree DEFAULT_URL = "http://www.freewisdom.org/projects/python-markdown/" DEFAULT_CREATOR = "Yuri Takhteyev" diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py index 009f161..952fd21 100644 --- a/markdown/extensions/tables.py +++ b/markdown/extensions/tables.py @@ -15,7 +15,7 @@ A simple example: Copyright 2009 - [Waylan Limberg](http://achinghead.com) """ import markdown -from markdown.misc import etree +from markdown.util import etree class TableProcessor(markdown.blockprocessors.BlockProcessor): diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 46798f5..7a93b0f 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -9,7 +9,7 @@ Dependencies: """ import markdown -from markdown.misc import etree +from markdown.util import etree import re class TocTreeprocessor(markdown.treeprocessors.Treeprocessor): diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 2da207f..f68e4e5 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -121,7 +121,7 @@ class WikiLinks(markdown.inlinepatterns.Pattern): base_url, end_url, html_class = self._getMeta() label = m.group(2).strip() url = self.config['build_url'][0](label, base_url, end_url) - a = markdown.misc.etree.Element('a') + a = markdown.util.etree.Element('a') a.text = label a.set('href', url) if html_class: diff --git a/markdown/html4.py b/markdown/html4.py index e47cb78..63bcc97 100644 --- a/markdown/html4.py +++ b/markdown/html4.py @@ -37,12 +37,12 @@ # -------------------------------------------------------------------- -import misc -ElementTree = misc.etree.ElementTree -QName = misc.etree.QName -Comment = misc.etree.Comment -PI = misc.etree.PI -ProcessingInstruction = misc.etree.ProcessingInstruction +import util +ElementTree = util.etree.ElementTree +QName = util.etree.QName +Comment = util.etree.Comment +PI = util.etree.PI +ProcessingInstruction = util.etree.ProcessingInstruction HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "isindex", "link", "meta" "param") diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index c277395..ceeef0a 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -41,7 +41,7 @@ So, we apply the expressions in the following order: * finally we apply strong and emphasis """ -import misc +import util import re from urlparse import urlparse, urlunparse import sys @@ -68,7 +68,7 @@ EMPHASIS_RE = r'(\*)([^\*]+)\2' # *emphasis* STRONG_RE = r'(\*{2}|_{2})(.+?)\2' # **strong** STRONG_EM_RE = r'(\*{3}|_{3})(.+?)\2' # ***strong*** -if misc.SMART_EMPHASIS: +if util.SMART_EMPHASIS: EMPHASIS_2_RE = r'(?<!\w)(_)(\S.+?)\2(?!\w)' # _emphasis_ else: EMPHASIS_2_RE = r'(_)(.+?)\2' # _emphasis_ @@ -159,7 +159,7 @@ class SimpleTextPattern (Pattern): """ Return a simple text of group(2) of a Pattern. """ def handleMatch(self, m): text = m.group(2) - if text == misc.INLINE_PLACEHOLDER_PREFIX: + if text == util.INLINE_PLACEHOLDER_PREFIX: return None return text @@ -174,7 +174,7 @@ class SimpleTagPattern (Pattern): self.tag = tag def handleMatch(self, m): - el = misc.etree.Element(self.tag) + el = util.etree.Element(self.tag) el.text = m.group(3) return el @@ -182,7 +182,7 @@ class SimpleTagPattern (Pattern): class SubstituteTagPattern (SimpleTagPattern): """ Return a eLement of type `tag` with no children. """ def handleMatch (self, m): - return misc.etree.Element(self.tag) + return util.etree.Element(self.tag) class BacktickPattern (Pattern): @@ -192,8 +192,8 @@ class BacktickPattern (Pattern): self.tag = "code" def handleMatch(self, m): - el = misc.etree.Element(self.tag) - el.text = misc.AtomicString(m.group(3).strip()) + el = util.etree.Element(self.tag) + el.text = util.AtomicString(m.group(3).strip()) return el @@ -205,8 +205,8 @@ class DoubleTagPattern (SimpleTagPattern): """ def handleMatch(self, m): tag1, tag2 = self.tag.split(",") - el1 = misc.etree.Element(tag1) - el2 = misc.etree.SubElement(el1, tag2) + el1 = util.etree.Element(tag1) + el2 = util.etree.SubElement(el1, tag2) el2.text = m.group(3) return el1 @@ -223,7 +223,7 @@ class HtmlPattern (Pattern): class LinkPattern (Pattern): """ Return a link element from the given match. """ def handleMatch(self, m): - el = misc.etree.Element("a") + el = util.etree.Element("a") el.text = m.group(2) title = m.group(11) href = m.group(9) @@ -275,7 +275,7 @@ class LinkPattern (Pattern): class ImagePattern(LinkPattern): """ Return a img element from the given match. """ def handleMatch(self, m): - el = misc.etree.Element("img") + el = util.etree.Element("img") src_parts = m.group(9).split() if src_parts: src = src_parts[0] @@ -287,7 +287,7 @@ class ImagePattern(LinkPattern): if len(src_parts) > 1: el.set('title', dequote(" ".join(src_parts[1:]))) - if misc.ENABLE_ATTRIBUTES: + if util.ENABLE_ATTRIBUTES: truealt = handleAttributes(m.group(2), el) else: truealt = m.group(2) @@ -313,7 +313,7 @@ class ReferencePattern(LinkPattern): return self.makeTag(href, title, text) def makeTag(self, href, title, text): - el = misc.etree.Element('a') + el = util.etree.Element('a') el.set('href', self.sanitize_url(href)) if title: @@ -326,7 +326,7 @@ class ReferencePattern(LinkPattern): class ImageReferencePattern (ReferencePattern): """ Match to a stored reference and return img element. """ def makeTag(self, href, title, text): - el = misc.etree.Element("img") + el = util.etree.Element("img") el.set("src", self.sanitize_url(href)) if title: el.set("title", title) @@ -337,9 +337,9 @@ class ImageReferencePattern (ReferencePattern): class AutolinkPattern (Pattern): """ Return a link Element given an autolink (`<http://example/com>`). """ def handleMatch(self, m): - el = misc.etree.Element("a") + el = util.etree.Element("a") el.set('href', m.group(2)) - el.text = misc.AtomicString(m.group(2)) + el.text = util.AtomicString(m.group(2)) return el class AutomailPattern (Pattern): @@ -347,7 +347,7 @@ class AutomailPattern (Pattern): Return a mailto link Element given an automail link (`<foo@example.com>`). """ def handleMatch(self, m): - el = misc.etree.Element('a') + el = util.etree.Element('a') email = m.group(2) if email.startswith("mailto:"): email = email[len("mailto:"):] @@ -356,15 +356,15 @@ class AutomailPattern (Pattern): """Return entity definition by code, or the code if not defined.""" entity = htmlentitydefs.codepoint2name.get(code) if entity: - return "%s%s;" % (misc.AMP_SUBSTITUTE, entity) + return "%s%s;" % (util.AMP_SUBSTITUTE, entity) else: - return "%s#%d;" % (misc.AMP_SUBSTITUTE, code) + return "%s#%d;" % (util.AMP_SUBSTITUTE, code) letters = [codepoint2name(ord(letter)) for letter in email] - el.text = misc.AtomicString(''.join(letters)) + el.text = util.AtomicString(''.join(letters)) mailto = "mailto:" + email - mailto = "".join([misc.AMP_SUBSTITUTE + '#%d;' % + mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' % ord(letter) for letter in mailto]) el.set('href', mailto) return el diff --git a/markdown/misc.py b/markdown/misc.py deleted file mode 100644 index f9c7894..0000000 --- a/markdown/misc.py +++ /dev/null @@ -1,73 +0,0 @@ -# -*- coding: utf-8 -*- -import re -from misc_logging import CRITICAL - -import etree_loader - - -""" -CONSTANTS -============================================================================= -""" - -""" -Constants you might want to modify ------------------------------------------------------------------------------ -""" - -# default logging level for command-line use -COMMAND_LINE_LOGGING_LEVEL = CRITICAL -TAB_LENGTH = 4 # expand tabs to this many spaces -ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> -SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that -DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output -HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode -BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" - "|script|noscript|form|fieldset|iframe|math" - "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody" - "|tr|th|td") -DOC_TAG = "div" # Element used to wrap document - later removed - -# Placeholders -STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder -ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder -INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" -INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX -AMP_SUBSTITUTE = STX+"amp"+ETX - - -""" -Constants you probably do not need to change ------------------------------------------------------------------------------ -""" - -RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), - # Hebrew (0590-05FF), Arabic (0600-06FF), - # Syriac (0700-074F), Arabic supplement (0750-077F), - # Thaana (0780-07BF), Nko (07C0-07FF). - (u'\u2D30', u'\u2D7F'), # Tifinagh - ) - -# Extensions should use "markdown.misc.etree" instead of "etree" (or do `from -# markdown.misc import etree`). Do not import it by yourself. - -etree = etree_loader.importETree() - -""" -AUXILIARY GLOBAL FUNCTIONS -============================================================================= -""" - - -def isBlockLevel(tag): - """Check if the tag is a block level HTML tag.""" - return BLOCK_LEVEL_ELEMENTS.match(tag) - -""" -MISC AUXILIARY CLASSES -============================================================================= -""" - -class AtomicString(unicode): - """A string which should not be further processed.""" - pass diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index ceb897f..4ce5324 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -9,7 +9,7 @@ processing. """ -import misc +import util import preprocessors class Processor: @@ -52,7 +52,7 @@ class RawHtmlPostprocessor(Postprocessor): elif str(self.markdown.safeMode).lower() == 'remove': html = '' else: - html = misc.HTML_REMOVED_TEXT + html = util.HTML_REMOVED_TEXT if safe or not self.markdown.safeMode: text = text.replace("<p>%s</p>" % (preprocessors.HTML_PLACEHOLDER % i), @@ -64,7 +64,7 @@ class RawHtmlPostprocessor(Postprocessor): def unescape(self, html): """ Unescape any markdown escaped text within inline html. """ for k, v in self.markdown.treeprocessors['inline'].stashed_nodes.items(): - ph = misc.INLINE_PLACEHOLDER % k + ph = util.INLINE_PLACEHOLDER % k html = html.replace(ph, '\%s' % v) return html @@ -82,5 +82,5 @@ class AndSubstitutePostprocessor(Postprocessor): pass def run(self, text): - text = text.replace(misc.AMP_SUBSTITUTE, "&") + text = text.replace(util.AMP_SUBSTITUTE, "&") return text diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 248e940..3b19953 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -9,10 +9,10 @@ complicated. import re -import misc +import util -HTML_PLACEHOLDER_PREFIX = misc.STX+"wzxhzdk:" -HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + misc.ETX +HTML_PLACEHOLDER_PREFIX = util.STX+"wzxhzdk:" +HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + util.ETX class Processor: def __init__(self, markdown_instance=None): @@ -173,11 +173,11 @@ class HtmlBlockPreprocessor(Preprocessor): # keep checking conditions below and maybe just append if data_index < len(block) \ - and misc.isBlockLevel(left_tag): + and util.isBlockLevel(left_tag): text.insert(0, block[data_index:]) block = block[:data_index] - if not (misc.isBlockLevel(left_tag) \ + if not (util.isBlockLevel(left_tag) \ or block[1] in ["!", "?", "@", "%"]): new_blocks.append(block) continue @@ -205,7 +205,7 @@ class HtmlBlockPreprocessor(Preprocessor): else: # if is block level tag and is not complete - if misc.isBlockLevel(left_tag) or left_tag == "--" \ + if util.isBlockLevel(left_tag) or left_tag == "--" \ and not block.rstrip().endswith(">"): items.append(block.strip()) in_tag = True diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 79c3999..b33472e 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -1,7 +1,7 @@ import re import inlinepatterns -import misc +import util def isString(s): """ Check if it's string """ @@ -39,17 +39,17 @@ class InlineProcessor(Treeprocessor): """ def __init__ (self, md): - self.__placeholder_prefix = misc.INLINE_PLACEHOLDER_PREFIX - self.__placeholder_suffix = misc.ETX + self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX + self.__placeholder_suffix = util.ETX self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ + len(self.__placeholder_suffix) - self.__placeholder_re = re.compile(misc.INLINE_PLACEHOLDER % r'([0-9]{4})') + self.__placeholder_re = re.compile(util.INLINE_PLACEHOLDER % r'([0-9]{4})') self.markdown = md def __makePlaceholder(self, type): """ Generate a placeholder """ id = "%04d" % len(self.stashed_nodes) - hash = misc.INLINE_PLACEHOLDER % id + hash = util.INLINE_PLACEHOLDER % id return hash, id def __findPlaceholder(self, data, index): @@ -89,7 +89,7 @@ class InlineProcessor(Treeprocessor): Returns: String with placeholders. """ - if not isinstance(data, misc.AtomicString): + if not isinstance(data, util.AtomicString): startIndex = 0 while patternIndex < len(self.markdown.inlinePatterns): data, matched, startIndex = self.__applyPattern( @@ -224,7 +224,7 @@ class InlineProcessor(Treeprocessor): return data, True, len(leftData) + match.span(len(match.groups()))[0] if not isString(node): - if not isinstance(node.text, misc.AtomicString): + if not isinstance(node.text, util.AtomicString): # We need to process current node too for child in [node] + node.getchildren(): if not isString(node): @@ -266,7 +266,7 @@ class InlineProcessor(Treeprocessor): currElement = stack.pop() insertQueue = [] for child in currElement.getchildren(): - if child.text and not isinstance(child.text, misc.AtomicString): + if child.text and not isinstance(child.text, util.AtomicString): text = child.text child.text = None lst = self.__processPlaceholders(self.__handleInline( @@ -277,7 +277,7 @@ class InlineProcessor(Treeprocessor): if child.getchildren(): stack.append(child) - if misc.ENABLE_ATTRIBUTES: + if util.ENABLE_ATTRIBUTES: for element, lst in insertQueue: if element.text: element.text = \ @@ -306,12 +306,12 @@ class PrettifyTreeprocessor(Treeprocessor): """ Recursively add linebreaks to ElementTree children. """ i = "\n" - if misc.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: + if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: if (not elem.text or not elem.text.strip()) \ - and len(elem) and misc.isBlockLevel(elem[0].tag): + and len(elem) and util.isBlockLevel(elem[0].tag): elem.text = i for e in elem: - if misc.isBlockLevel(e.tag): + if util.isBlockLevel(e.tag): self._prettifyETree(e) if not elem.tail or not elem.tail.strip(): elem.tail = i diff --git a/markdown/util.py b/markdown/util.py new file mode 100644 index 0000000..9cdd234 --- /dev/null +++ b/markdown/util.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +import re +from misc_logging import CRITICAL + +import etree_loader + + +""" +CONSTANTS +============================================================================= +""" + +""" +Constants you might want to modify +----------------------------------------------------------------------------- +""" + +# default logging level for command-line use +COMMAND_LINE_LOGGING_LEVEL = CRITICAL +TAB_LENGTH = 4 # expand tabs to this many spaces +ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz"> +SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that +DEFAULT_OUTPUT_FORMAT = 'xhtml1' # xhtml or html4 output +HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode +BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" + "|script|noscript|form|fieldset|iframe|math" + "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody" + "|tr|th|td") +DOC_TAG = "div" # Element used to wrap document - later removed + +# Placeholders +STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder +ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder +INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" +INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX +AMP_SUBSTITUTE = STX+"amp"+ETX + + +""" +Constants you probably do not need to change +----------------------------------------------------------------------------- +""" + +RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), + # Hebrew (0590-05FF), Arabic (0600-06FF), + # Syriac (0700-074F), Arabic supplement (0750-077F), + # Thaana (0780-07BF), Nko (07C0-07FF). + (u'\u2D30', u'\u2D7F'), # Tifinagh + ) + +# Extensions should use "markdown.util.etree" instead of "etree" (or do `from +# markdown.util import etree`). Do not import it by yourself. + +etree = etree_loader.importETree() + +""" +AUXILIARY GLOBAL FUNCTIONS +============================================================================= +""" + + +def isBlockLevel(tag): + """Check if the tag is a block level HTML tag.""" + return BLOCK_LEVEL_ELEMENTS.match(tag) + +""" +MISC AUXILIARY CLASSES +============================================================================= +""" + +class AtomicString(unicode): + """A string which should not be further processed.""" + pass -- cgit v1.2.3