# -*- coding: utf-8 -*- from __future__ import unicode_literals import re import sys """ Python 3 Stuff ============================================================================= """ PY3 = sys.version_info[0] == 3 if PY3: # pragma: no cover string_type = str text_type = str int2str = chr iterrange = range else: # pragma: no cover string_type = basestring # noqa text_type = unicode # noqa int2str = unichr # noqa iterrange = xrange # noqa """ Constants you might want to modify ----------------------------------------------------------------------------- """ BLOCK_LEVEL_ELEMENTS = re.compile( r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" r"|script|noscript|form|fieldset|iframe|math" r"|hr|hr/|style|li|dt|dd|thead|tbody" r"|tr|th|td|section|footer|header|group|figure" r"|figcaption|aside|article|canvas|output" r"|progress|video|nav|main)$", re.IGNORECASE ) # Placeholders STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') AMP_SUBSTITUTE = STX+"amp"+ETX HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX """ Constants you probably do not need to change ----------------------------------------------------------------------------- """ RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), # Hebrew (0590-05FF), Arabic (0600-06FF), # Syriac (0700-074F), Arabic supplement (0750-077F), # Thaana (0780-07BF), Nko (07C0-07FF). ('\u2D30', '\u2D7F') # Tifinagh ) # Extensions should use "markdown.util.etree" instead of "etree" (or do `from # markdown.util import etree`). Do not import it by yourself. try: # pragma: no cover # Is the C implementation of ElementTree available? import xml.etree.cElementTree as etree from xml.etree.ElementTree import Comment # Serializers (including ours) test with non-c Comment etree.test_comment = Comment if etree.VERSION < "1.0.5": raise RuntimeError("cElementTree version 1.0.5 or higher is required.") except (ImportError, RuntimeError): # pragma: no cover # Use the Python implementation of ElementTree? import xml.etree.ElementTree as etree if etree.VERSION < "1.1": raise RuntimeError("ElementTree version 1.1 or higher is required") """ AUXILIARY GLOBAL FUNCTIONS ============================================================================= """ def isBlockLevel(tag): """Check if the tag is a block level HTML tag.""" if isinstance(tag, string_type): return BLOCK_LEVEL_ELEMENTS.match(tag) # Some ElementTree tags are not strings, so return False. return False def parseBoolValue(value, fail_on_errors=True, preserve_none=False): """Parses a string representing bool value. If parsing was successful, returns True or False. If preserve_none=True, returns True, False, or None. If parsing was not successful, raises ValueError, or, if fail_on_errors=False, returns None.""" if not isinstance(value, string_type): if preserve_none and value is None: return value return bool(value) elif preserve_none and value.lower() == 'none': return None elif value.lower() in ('true', 'yes', 'y', 'on', '1'): return True elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): return False elif fail_on_errors: raise ValueError('Cannot parse bool value: %r' % value) """ MISC AUXILIARY CLASSES ============================================================================= """ class AtomicString(text_type): """A string which should not be further processed.""" pass class Processor(object): def __init__(self, markdown_instance=None): if markdown_instance: self.markdown = markdown_instance class HtmlStash(object): """ This class is used for stashing HTML objects that we extract in the beginning and replace with place-holders. """ def __init__(self): """ Create a HtmlStash. """ self.html_counter = 0 # for counting inline html segments self.rawHtmlBlocks = [] self.tag_counter = 0 self.tag_data = [] # list of dictionaries in the order tags appear def store(self, html): """ Saves an HTML segment for later reinsertion. Returns a placeholder string that needs to be inserted into the document. Keyword arguments: * html: an html segment Returns : a placeholder string """ self.rawHtmlBlocks.append(html) placeholder = self.get_placeholder(self.html_counter) self.html_counter += 1 return placeholder def reset(self): self.html_counter = 0 self.rawHtmlBlocks = [] def get_placeholder(self, key): return HTML_PLACEHOLDER % key def store_tag(self, tag, attrs, left_index, right_index): """Store tag data and return a placeholder.""" self.tag_data.append({'tag': tag, 'attrs': attrs, 'left_index': left_index, 'right_index': right_index}) placeholder = TAG_PLACEHOLDER % str(self.tag_counter) self.tag_counter += 1 # equal to the tag's index in self.tag_data return placeholder