diff options
Diffstat (limited to 'mdx_codehilite.py')
-rw-r--r-- | mdx_codehilite.py | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/mdx_codehilite.py b/mdx_codehilite.py new file mode 100644 index 0000000..ea83e9a --- /dev/null +++ b/mdx_codehilite.py @@ -0,0 +1,270 @@ +#!/usr/bin/python +import markdown + +# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY ----------------- + +DEFAULT_HILITER = 'pygments' # one of 'enscript', 'dp', or 'pygments' +try: + TAB_LENGTH = markdown.TAB_LENGTH +except AttributeError: + TAB_LENGTH = 4 + +# --------------- THE CODE ------------------------------------------- +# --------------- hiliter utility functions -------------------------- +def escape(txt) : + '''basic html escaping''' + txt = txt.replace('&', '&') + txt = txt.replace('<', '<') + txt = txt.replace('>', '>') + txt = txt.replace('"', '"') + return txt + +def number(txt): + '''use <ol> for line numbering''' + # Fix Whitespace + txt = txt.replace('\t', ' '*TAB_LENGTH) + txt = txt.replace(" "*4, " ") + txt = txt.replace(" "*3, " ") + txt = txt.replace(" "*2, " ") + + # Add line numbers + lines = txt.splitlines() + txt = '<div class="codehilite"><pre><ol>\n' + for line in lines: + txt += '\t<li>%s</li>\n'% line + txt += '</ol></pre></div>\n' + return txt + +# ---------------- The hiliters --------------------------------------- +def enscript(src, lang=None, num=True): + ''' +Pass source code on to [enscript] (http://www.codento.com/people/mtr/genscript/) +command line utility for hiliting. + +Usage: + >>> enscript(src [, lang [, num ]] ) + + @param src: Can be a string or any object with a .readline attribute. + + @param lang: The language of code. Basic escaping only, if None. + + @param num: (Boolen) Turns line numbering 'on' or 'off' (on by default). + + @returns : A string of html. + ''' + if lang: + cmd = 'enscript --highlight=%s --color --language=html --tabsize=%d --output=-'% (lang, TAB_LENGTH) + from os import popen3 + (i, out, err) = popen3(cmd) + i.write(src) + i.close() + # check for errors + e = err.read() + if e != 'output left in -\n' : + # error - just escape + txt = escape(src) + else : + import re + pattern = re.compile(r'<PRE>(?P<code>.*?)</PRE>', re.DOTALL) + txt = pattern.search(out.read()).group('code') + # fix enscripts output + txt = txt.replace('\n</FONT></I>', '</FONT></I>\n').strip() + html_map = {'<I>' : '<em>', + '</I>' : '</em>', + '<B>' : '<strong>', + '</B>' : '</strong>', + '<FONT COLOR="#' : '<span style="color:#', + '</FONT>' : '</span>' + } + for k, v in html_map.items() : + txt = txt.replace(k, v) + else: + txt = escape(src) + if num : + txt = number(txt) + else : + txt = '<div class="codehilite"><pre>%s</pre></div>\n'% txt + return txt + + +def dp(src, lang=None, num=True): + ''' +Pass source code to a textarea for the [dp.SyntaxHighlighter] (http://www.dreamprojections.com/syntaxhighlighter/Default.aspx) + +Usage: + >>> dp(src [, lang [, num ]] ) + + @param src: A string. + + @param lang: The language of code. Undefined if None. + + @param num: (Boolen) Turns line numbering 'on' or 'off' (on by default). + + @returns : A string of html. + ''' + gutter = '' + if not num: + gutter = ':nogutter' + if not lang: + lang = '' + + return '<div class="codehilite"><textarea name="code" class="%s%s" cols="60" rows="10">\n%s\n</textarea></div>\n'% (lang, gutter, src) + +def pygment(src, lang = None, num = True): + ''' +Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with +optional line numbers. The output should then be styled with css to your liking. +No styles are applied by default - only styling hooks (i.e.: <span class="k">). + +Usage: + >>> pygment(src [, lang [, num ]] ) + + @param src: Can be a string or any object with a .readline attribute. + + @param lang: The language of code. Pygments will try to guess language if None. + + @param num: (Boolen) Turns line numbering 'on' or 'off' (on by default). + + @returns : A string of html. + ''' + try: + from pygments import highlight + from pygments.lexers import get_lexer_by_name, guess_lexer, TextLexer + from pygments.formatters import HtmlFormatter + except ImportError: + # just escape and pass through + txt = escape(src) + if num: + txt = number(txt) + else : + txt = '<div class="codehilite"><pre>%s</pre></div>\n'% txt + return txt + else: + try: + lexer = get_lexer_by_name(lang) + except ValueError: + try: + lexer = guess_lexer(src) + except ValueError: + lexer = TextLexer() + formatter = HtmlFormatter(linenos=num, cssclass="codehilite") + return highlight(src, lexer, formatter) + + +# ------------------ The Main CodeHilite Class ---------------------- +class CodeHilite: + ''' +A wrapper class providing a single API for various hilighting engines. Takes source code, determines which language it containes (if not provided), and passes it into the hiliter specified. + +Basic Usage: + >>> code = CodeHilite(src = text) + >>> html = code.hilite() + + @param src: Can be a string or any object with a .readline attribute. + + @param lang: A string. Accepted values determined by hiliter used. Overrides _getLang() + + @param linenos: (Boolen) Turns line numbering 'on' or 'off' (off by default). + + @param hiliter: A string. One of 'enscript', 'dp', or 'pygments'. + +Low Level Usage: + >>> code = CodeHilite() + >>> code.src = text # Can be a string or any object with a .readline attribute. + >>> code.lang = 'python' # Setting this will override _getLang() + >>> code.linenos = True # True or False; Turns line numbering on or off. + >>> code.hiliter = MyCustomHiliter # Where MyCustomHiliter is callable, takes three arguments (src, lang, linenos) and returns a string. + >>> html = code.hilite() + ''' + def __init__(self, src=None, lang=None, linenos = False, hiliter=DEFAULT_HILITER): + self.src = src + self.lang = lang + self.linenos = linenos + # map of highlighters + hl_map = { 'enscript' : enscript, 'dp' : dp, 'pygments' : pygment } + try : + self.hiliter = hl_map[hiliter] + except KeyError: + raise "Please provide a valid hiliter as a string. One of 'enscript', 'dp', or 'pygments'" + + + def _getLang(self): + ''' +Determines language of a code block from shebang lines and whether said line should be removed or left in place. If the sheband line contains a path (even a single /) then it is assumed to be a real shebang lines and left alone. However, if no path is given (e.i.: #!python or :::python) then it is assumed to be a mock shebang for language identifitation of a code fragment and removed from the code block prior to processing for code highlighting. When a mock shebang (e.i: #!python) is found, line numbering is turned on. When colons are found in place of a shebang (e.i.: :::python), line numbering is left in the current state - off by default. + ''' + import re + + #split text into lines + lines = self.src.split("\n") + #pull first line to examine + fl = lines.pop(0) + + c = re.compile(r''' + (?:(?:::+)|(?P<shebang>[#]!)) #shebang or 2 or more colons + (?P<path>(?:/\w+)*[/ ])? # zero or 1 path ending in either a / or a single space + (?P<lang>\w*) # the language (a single / or space before lang is a path) + ''', re.VERBOSE) + # search first line for shebang + m = c.search(fl) + if m: + # we have a match + try: + self.lang = m.group('lang').lower() + except IndexError: + self.lang = None + if m.group('path'): + # path exists - restore first line + lines.insert(0, fl) + if m.group('shebang'): + # shebang exists - use line numbers + self.linenos = True + else: + # No match + lines.insert(0, fl) + + self.src = "\n".join(lines).strip("\n") + + def hilite(self): + '''The wrapper function which brings it all togeather''' + self.src = self.src.strip('\n') + + if not self.lang : self._getLang() + + return self.hiliter(self.src, self.lang, self.linenos) + + +# ------------------ The Markdown Extention ------------------------------- +class CodeHiliteExtention (markdown.Extension) : + def __init__(self, configs): + # define default configs + self.config = {'hiliter' : [DEFAULT_HILITER, "one of 'enscript', 'dp', or 'pygments'"], + 'force_linenos' : [False, "Force line numbers - Default: False"] } + + # Override defaults with user settings + for key, value in configs : + # self.config[key][0] = value + self.setConfig(key, value) + + def extendMarkdown(self, md, md_globals) : + + def _hiliteCodeBlock(parent_elem, lines, inList): + """Overrides function of same name in standard Markdown class and + sends code blocks to a code highlighting proccessor. The result + is then stored in the HtmlStash, a placeholder is inserted into + the dom and the remainder of the text file is processed recursively. + + @param parent_elem: DOM element to which the content will be added + @param lines: a list of lines + @param inList: a level + @returns: None""" + detabbed, theRest = md.blockGuru.detectTabbed(lines) + text = "\n".join(detabbed).rstrip()+"\n" + code = CodeHilite(text, hiliter=self.config['hiliter'][0], linenos=self.config['force_linenos'][0]) + placeholder = md.htmlStash.store(code.hilite()) + parent_elem.appendChild(md.doc.createTextNode(placeholder)) + md._processSection(parent_elem, theRest, inList) + + md._processCodeBlock = _hiliteCodeBlock + +def makeExtension(configs=None) : + return CodeHiliteExtention(configs=configs) |