#!/usr/bin/python
import markdown
# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
DEFAULT_HILITER = 'pygments' # one of 'enscript', 'dp', or 'pygments'
try:
TAB_LENGTH = markdown.TAB_LENGTH
except AttributeError:
TAB_LENGTH = 4
# --------------- THE CODE -------------------------------------------
# --------------- hiliter utility functions --------------------------
def escape(txt) :
'''basic html escaping'''
txt = txt.replace('&', '&')
txt = txt.replace('<', '<')
txt = txt.replace('>', '>')
txt = txt.replace('"', '"')
return txt
def number(txt):
'''use
for line numbering'''
# Fix Whitespace
txt = txt.replace('\t', ' '*TAB_LENGTH)
txt = txt.replace(" "*4, " ")
txt = txt.replace(" "*3, " ")
txt = txt.replace(" "*2, " ")
# Add line numbers
lines = txt.splitlines()
txt = '\n'
for line in lines:
txt += '\t- %s
\n'% line
txt += '
\n'
return txt
# ---------------- The hiliters ---------------------------------------
def enscript(src, lang=None, num=True):
'''
Pass source code on to [enscript] (http://www.codento.com/people/mtr/genscript/)
command line utility for hiliting.
Usage:
>>> enscript(src [, lang [, num ]] )
@param src: Can be a string or any object with a .readline attribute.
@param lang: The language of code. Basic escaping only, if None.
@param num: (Boolen) Turns line numbering 'on' or 'off' (on by default).
@returns : A string of html.
'''
if lang:
cmd = 'enscript --highlight=%s --color --language=html --tabsize=%d --output=-'% (lang, TAB_LENGTH)
from os import popen3
(i, out, err) = popen3(cmd)
i.write(src)
i.close()
# check for errors
e = err.read()
if e != 'output left in -\n' :
# error - just escape
txt = escape(src)
else :
import re
pattern = re.compile(r'(?P.*?)
', re.DOTALL)
txt = pattern.search(out.read()).group('code')
# fix enscripts output
txt = txt.replace('\n', '\n').strip()
html_map = {'' : '',
'' : '',
'' : '',
'' : '',
'' : ''
}
for k, v in html_map.items() :
txt = txt.replace(k, v)
else:
txt = escape(src)
if num :
txt = number(txt)
else :
txt = '\n'% txt
return txt
def dp(src, lang=None, num=True):
'''
Pass source code to a textarea for the [dp.SyntaxHighlighter] (http://www.dreamprojections.com/syntaxhighlighter/Default.aspx)
Usage:
>>> dp(src [, lang [, num ]] )
@param src: A string.
@param lang: The language of code. Undefined if None.
@param num: (Boolen) Turns line numbering 'on' or 'off' (on by default).
@returns : A string of html.
'''
gutter = ''
if not num:
gutter = ':nogutter'
if not lang:
lang = ''
return '\n'% (lang, gutter, src)
def pygment(src, lang = None, num = True):
'''
Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
optional line numbers. The output should then be styled with css to your liking.
No styles are applied by default - only styling hooks (i.e.: ).
Usage:
>>> pygment(src [, lang [, num ]] )
@param src: Can be a string or any object with a .readline attribute.
@param lang: The language of code. Pygments will try to guess language if None.
@param num: (Boolen) Turns line numbering 'on' or 'off' (on by default).
@returns : A string of html.
'''
try:
from pygments import highlight
from pygments.lexers import get_lexer_by_name, guess_lexer, TextLexer
from pygments.formatters import HtmlFormatter
except ImportError:
# just escape and pass through
txt = escape(src)
if num:
txt = number(txt)
else :
txt = '\n'% txt
return txt
else:
try:
lexer = get_lexer_by_name(lang)
except ValueError:
try:
lexer = guess_lexer(src)
except ValueError:
lexer = TextLexer()
formatter = HtmlFormatter(linenos=num, cssclass="codehilite")
return highlight(src, lexer, formatter)
# ------------------ The Main CodeHilite Class ----------------------
class CodeHilite:
'''
A wrapper class providing a single API for various hilighting engines. Takes source code, determines which language it containes (if not provided), and passes it into the hiliter specified.
Basic Usage:
>>> code = CodeHilite(src = text)
>>> html = code.hilite()
@param src: Can be a string or any object with a .readline attribute.
@param lang: A string. Accepted values determined by hiliter used. Overrides _getLang()
@param linenos: (Boolen) Turns line numbering 'on' or 'off' (off by default).
@param hiliter: A string. One of 'enscript', 'dp', or 'pygments'.
Low Level Usage:
>>> code = CodeHilite()
>>> code.src = text # Can be a string or any object with a .readline attribute.
>>> code.lang = 'python' # Setting this will override _getLang()
>>> code.linenos = True # True or False; Turns line numbering on or off.
>>> code.hiliter = MyCustomHiliter # Where MyCustomHiliter is callable, takes three arguments (src, lang, linenos) and returns a string.
>>> html = code.hilite()
'''
def __init__(self, src=None, lang=None, linenos = False, hiliter=DEFAULT_HILITER):
self.src = src
self.lang = lang
self.linenos = linenos
# map of highlighters
hl_map = { 'enscript' : enscript, 'dp' : dp, 'pygments' : pygment }
try :
self.hiliter = hl_map[hiliter]
except KeyError:
raise "Please provide a valid hiliter as a string. One of 'enscript', 'dp', or 'pygments'"
def _getLang(self):
'''
Determines language of a code block from shebang lines and whether said line should be removed or left in place. If the sheband line contains a path (even a single /) then it is assumed to be a real shebang lines and left alone. However, if no path is given (e.i.: #!python or :::python) then it is assumed to be a mock shebang for language identifitation of a code fragment and removed from the code block prior to processing for code highlighting. When a mock shebang (e.i: #!python) is found, line numbering is turned on. When colons are found in place of a shebang (e.i.: :::python), line numbering is left in the current state - off by default.
'''
import re
#split text into lines
lines = self.src.split("\n")
#pull first line to examine
fl = lines.pop(0)
c = re.compile(r'''
(?:(?:::+)|(?P[#]!)) #shebang or 2 or more colons
(?P(?:/\w+)*[/ ])? # zero or 1 path ending in either a / or a single space
(?P\w*) # the language (a single / or space before lang is a path)
''', re.VERBOSE)
# search first line for shebang
m = c.search(fl)
if m:
# we have a match
try:
self.lang = m.group('lang').lower()
except IndexError:
self.lang = None
if m.group('path'):
# path exists - restore first line
lines.insert(0, fl)
if m.group('shebang'):
# shebang exists - use line numbers
self.linenos = True
else:
# No match
lines.insert(0, fl)
self.src = "\n".join(lines).strip("\n")
def hilite(self):
'''The wrapper function which brings it all togeather'''
self.src = self.src.strip('\n')
if not self.lang : self._getLang()
return self.hiliter(self.src, self.lang, self.linenos)
# ------------------ The Markdown Extention -------------------------------
class CodeHiliteExtention (markdown.Extension) :
def __init__(self, configs):
# define default configs
self.config = {'hiliter' : [DEFAULT_HILITER, "one of 'enscript', 'dp', or 'pygments'"],
'force_linenos' : [False, "Force line numbers - Default: False"] }
# Override defaults with user settings
for key, value in configs :
# self.config[key][0] = value
self.setConfig(key, value)
def extendMarkdown(self, md, md_globals) :
def _hiliteCodeBlock(parent_elem, lines, inList):
"""Overrides function of same name in standard Markdown class and
sends code blocks to a code highlighting proccessor. The result
is then stored in the HtmlStash, a placeholder is inserted into
the dom and the remainder of the text file is processed recursively.
@param parent_elem: DOM element to which the content will be added
@param lines: a list of lines
@param inList: a level
@returns: None"""
detabbed, theRest = md.blockGuru.detectTabbed(lines)
text = "\n".join(detabbed).rstrip()+"\n"
code = CodeHilite(text, hiliter=self.config['hiliter'][0], linenos=self.config['force_linenos'][0])
placeholder = md.htmlStash.store(code.hilite())
parent_elem.appendChild(md.doc.createTextNode(placeholder))
md._processSection(parent_elem, theRest, inList)
md._processCodeBlock = _hiliteCodeBlock
def makeExtension(configs=None) :
return CodeHiliteExtention(configs=configs)