""" POST-PROCESSORS ============================================================================= Markdown also allows post-processors, which are similar to preprocessors in that they need to implement a "run" method. However, they are run after core processing. """ import re import util import odict def build_postprocessors(md_instance, **kwargs): """ Build the default postprocessors for Markdown. """ postprocessors = odict.OrderedDict() postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance) postprocessors["amp_substitute"] = AndSubstitutePostprocessor() postprocessors["unescape"] = UnescapePostprocessor() return postprocessors class Postprocessor(util.Processor): """ Postprocessors are run after the ElementTree it converted back into text. Each Postprocessor implements a "run" method that takes a pointer to a text string, modifies it as necessary and returns a text string. Postprocessors must extend markdown.Postprocessor. """ def run(self, text): """ Subclasses of Postprocessor should implement a `run` method, which takes the html document as a single text string and returns a (possibly modified) string. """ pass class RawHtmlPostprocessor(Postprocessor): """ Restore raw html to the document. """ def run(self, text): """ Iterate over html stash and restore "safe" html. """ for i in range(self.markdown.htmlStash.html_counter): html, safe = self.markdown.htmlStash.rawHtmlBlocks[i] if self.markdown.safeMode and not safe: if str(self.markdown.safeMode).lower() == 'escape': html = self.escape(html) elif str(self.markdown.safeMode).lower() == 'remove': html = '' else: html = self.markdown.html_replacement_text if self.isblocklevel(html) and (safe or not self.markdown.safeMode): text = text.replace("

" % (self.markdown.htmlStash.get_placeholder(i)), html + "\n") html = self.unescape(html) text = text.replace(self.markdown.htmlStash.get_placeholder(i), html) return text def unescape(self, html): """ Unescape any markdown escaped text within inline html. """ for k, v in self.markdown.treeprocessors['inline'].stashed_nodes.items(): ph = util.INLINE_PLACEHOLDER % k try: html = html.replace(ph, '%s' % util.etree.tostring(v)) except: html = html.replace(ph, '\%s' % v) return html def escape(self, html): """ Basic html escaping """ html = html.replace('&', '&') html = html.replace('<', '<') html = html.replace('>', '>') return html.replace('"', '"') def isblocklevel(self, html): m = re.match(r'^\<\/?([^ ]+)', html) if m: if m.group(1)[0] in ('!', '?', '@', '%'): # Comment, php etc... return True return util.isBlockLevel(m.group(1)) return False class AndSubstitutePostprocessor(Postprocessor): """ Restore valid entities """ def run(self, text): text = text.replace(util.AMP_SUBSTITUTE, "&") return text class UnescapePostprocessor(Postprocessor): """ Restore escaped chars """ RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX)) def unescape(self, m): return unichr(int(m.group(1))) def run(self, text): return self.RE.sub(self.unescape, text)