aboutsummaryrefslogtreecommitdiffstats
path: root/markdown.py
diff options
context:
space:
mode:
authorArtem Yunusov <nedrlab@gmail.com>2008-07-14 16:17:33 +0500
committerArtem Yunusov <nedrlab@gmail.com>2008-07-14 16:17:33 +0500
commita316ac49a6934221b24ec58a6a7dc4c3b30ae1e8 (patch)
treedfbbdef295252fe9fe07ed888ed0a3b79c83b8d3 /markdown.py
parent54a6d381a1a92210f3e619f7b2910b38a0568f9c (diff)
downloadmarkdown-a316ac49a6934221b24ec58a6a7dc4c3b30ae1e8.tar.gz
markdown-a316ac49a6934221b24ec58a6a7dc4c3b30ae1e8.tar.bz2
markdown-a316ac49a6934221b24ec58a6a7dc4c3b30ae1e8.zip
A lot of bug fixes. Handlig attributes added, new hr processing. Reformatted test suite for ElementTree output.
Diffstat (limited to 'markdown.py')
-rw-r--r--markdown.py181
1 files changed, 100 insertions, 81 deletions
diff --git a/markdown.py b/markdown.py
index f3f2481..032e084 100644
--- a/markdown.py
+++ b/markdown.py
@@ -62,31 +62,25 @@ def message(level, text):
def isstr(s):
return isinstance(s, unicode) or isinstance(s, str)
-'''try:
- # running with lxml.etree
- from lxml import etree
+try:
+ # Python 2.5+
+ import xml.etree.cElementTree as etree
except ImportError:
try:
# Python 2.5+
- import xml.etree.cElementTree as etree
+ import xml.etree.ElementTree as etree
except ImportError:
try:
- # Python 2.5+
- import xml.etree.ElementTree as etree
+ # normal cElementTree install
+ import cElementTree as etree
except ImportError:
try:
- # normal cElementTree install
- import cElementTree as etree
+ # normal ElementTree install
+ import elementtree.ElementTree as etree
except ImportError:
- try:
- # normal ElementTree install
- import elementtree.ElementTree as etree
- except ImportError:
- message(CRITICAL,
- "Failed to import ElementTree from any known place")
- sys.exit(1)'''
-
-import xml.etree.cElementTree as etree
+ message(CRITICAL,
+ "Failed to import ElementTree from any known place")
+ sys.exit(1)
def indentETree(elem, level=0):
@@ -110,7 +104,7 @@ def indentETree(elem, level=0):
TAB_LENGTH = 4 # expand tabs to this many spaces
ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
-SMART_EMPHASIS = 1 # this_or_that does not become this<i>or</i>that
+SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
@@ -184,6 +178,13 @@ def codepoint2name(code):
else:
return "%s#%d;" % (AND_SUBSTITUTE, code)
+def handleAttributes(text, parent):
+
+ def attributeCallback(match):
+ parent.set(match.group(1), match.group(2))
+
+ return RE.regExp['attr'].sub(attributeCallback, text)
+
"""
======================================================================
@@ -395,24 +396,27 @@ class LinePreprocessor(Preprocessor):
for i in range(len(lines)):
prefix = ''
m = self.blockquote_re.search(lines[i])
- if m : prefix = m.group(0)
+ if m:
+ prefix = m.group(0)
if self._isLine(lines[i][len(prefix):]):
- lines[i] = prefix + self.stash.store("<hr />", safe=True)
+ #lines[i] = prefix + self.stash.store("<hr />", safe=True)
+ lines[i] = prefix + "___"
return lines
def _isLine(self, block):
"""Determine if a block should be replaced with an <HR>"""
- if block.startswith(" "): return 0 # a code block
+ if block.startswith(" "):
+ return False # a code block
text = "".join([x for x in block if not x.isspace()])
if len(text) <= 2:
- return 0
+ return False
for pattern in ['isline1', 'isline2', 'isline3']:
m = RE.regExp[pattern].match(text)
if (m and m.group(1)):
- return 1
+ return True
else:
- return 0
+ return False
LINE_PREPROCESSOR = LinePreprocessor()
@@ -510,7 +514,7 @@ STRONG_EM_RE = r'\*\*\*(.*?|[^***]+?)\*\*\*' # ***strong***
if SMART_EMPHASIS:
- EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_
+ EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]+)_' # _emphasis_
else:
EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
@@ -644,14 +648,13 @@ class LinkPattern (Pattern):
parts = m.group(9).split('"')
# We should now have [], [href], or [href, title]
if parts:
-
el.set("href", self.sanatize_url(parts[0].strip()))
else:
el.set("href", "")
if len(parts) > 1:
# we also got a title
- title = '"' + '"'.join(parts[1:]).strip()
+ title = ('"' + '"'.join(parts[1:]).strip())[1:-1]
#title = dequote(title) #.replace('"', "&quot;")
el.set("title", title)
return el
@@ -690,6 +693,7 @@ class LinkPattern (Pattern):
class ImagePattern(LinkPattern):
""" Return a NanoDom img Element from the given match. """
+
def handleMatch(self, m):
el = etree.Element("img")
src_parts = m.group(9).split()
@@ -699,27 +703,12 @@ class ImagePattern(LinkPattern):
el.set('src', "")
if len(src_parts) > 1:
el.set('title', dequote(" ".join(src_parts[1:])))
-
- # Need to be reimplemented
- '''if ENABLE_ATTRIBUTES:
- el.text = m.group(2)
- truealt = text.value
- el.childNodes.remove(text)
-
- self.attrRegExp.sub(self.attributeCallback, self.value)
-
-
- text = doc.createTextNode(m.group(2))
- el.appendChild(text)
- text.handleAttributes()
- truealt = text.value
- el.childNodes.remove(text)
-
-
+
+ if ENABLE_ATTRIBUTES:
+ truealt = handleAttributes(m.group(2), el)
else:
- truealt = m.group(2)'''
+ truealt = m.group(2)
- truealt = m.group(2)
el.set('alt', truealt)
return el
@@ -737,6 +726,7 @@ class ReferencePattern(LinkPattern):
if not self.references.has_key(id): # ignore undefined refs
return None
href, title = self.references[id]
+
text = m.group(2)
return self.makeTag(href, title, text)
@@ -746,6 +736,7 @@ class ReferencePattern(LinkPattern):
el.set('href', self.sanatize_url(href))
if title:
el.set('title', title)
+
el.text = text
return el
@@ -1130,6 +1121,7 @@ class CorePatterns:
re.DOTALL)
self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)
+ self.regExp['attr'] = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123}
RE = CorePatterns()
@@ -1389,7 +1381,13 @@ class Markdown:
if len(paragraph) and paragraph[0].startswith('#'):
self._processHeader(parent_elem, paragraph)
+
+ elif len(paragraph) and \
+ RE.regExp["isline3"].match(paragraph[0]):
+ self._processHR(parent_elem)
+ lines = paragraph[1:] + lines
+
elif paragraph:
self._processParagraph(parent_elem, paragraph,
inList, looseList)
@@ -1397,7 +1395,9 @@ class Markdown:
if lines and not lines[0].strip():
lines = lines[1:] # skip the first (blank) line
-
+ def _processHR(self, parent_elem):
+ hr = etree.SubElement(parent_elem, "hr")
+
def _processHeader(self, parent_elem, paragraph):
m = RE.regExp['header'].match(paragraph[0])
if m:
@@ -1410,10 +1410,6 @@ class Markdown:
def _processParagraph(self, parent_elem, paragraph, inList, looseList):
- #list = self._handleInline("\n".join(paragraph))
-
-
-
if ( parent_elem.tag == 'li'
and not (looseList or parent_elem.getchildren())):
@@ -1426,13 +1422,21 @@ class Markdown:
# Otherwise make a "p" element
el = etree.SubElement(parent_elem, "p")
- #el.appendChild(self.doc.createTextNode("\n".join(paragraph), "inline"))
- inline = etree.SubElement(el, "inline")
- inline.text = "\n".join(paragraph)
+ dump = []
- #for item in list:
- #el.appendChild(item)
-
+ # Searching for hr
+ for line in paragraph:
+ if RE.regExp["isline3"].match(line):
+ inline = etree.SubElement(el, "inline")
+ inline.text = "\n".join(dump)
+ etree.SubElement(el, "hr")
+ dump.clear()
+ else:
+ dump.append(line)
+ if dump:
+ text = "\n".join(dump)
+ inline = etree.SubElement(el, "inline")
+ inline.text = text
def _processUList(self, parent_elem, lines, inList):
self._processList(parent_elem, lines, inList,
@@ -1673,15 +1677,22 @@ class Markdown:
"""
def linkText(text):
- if result:
- result[-1].tail = text
- else:
- parent.text = text
+ if text:
+ if result:
+ if result[-1].tail:
+ result[-1].tail += text
+ else:
+ result[-1].tail = text
+ else:
+ if parent.text:
+ parent.text += text
+ else:
+ parent.text = text
result = []
prefix = self.inlineStash.prefix
strartIndex = 0
-
+
while data:
index = data.find(prefix, strartIndex)
@@ -1693,13 +1704,12 @@ class Markdown:
node = self.inlineStash.get(id)
+ if index > 0:
+ text = data[strartIndex:index]
+ linkText(text)
if not isstr(node): # it's Element
-
- if index > 0:
- text = data[strartIndex:index]
- linkText(text)
-
+
for child in [node] + node.getchildren():
if child.tail:
@@ -1724,8 +1734,7 @@ class Markdown:
else:
text = data[strartIndex:].strip()
- if text:
- linkText(text)
+ linkText(text)
data = ""
return result
@@ -1737,7 +1746,7 @@ class Markdown:
Given a pattern name, this function checks if the line
fits the pattern, creates the necessary elements, adds it
to InlineStash, and returns string with placeholders,
- instead of DOM elements.
+ instead of ElementTree elements.
Keyword arguments:
@@ -1772,19 +1781,18 @@ class Markdown:
pholder = self.inlineStash.add(node, pattern.type())
return "%s%s%s" % (match.group(1), pholder, match.groups()[-1]), True
-
def _processTree(self, el):
"""
- Processing NanoDOM markdown tree, and applying inline patterns
+ Processing ElementTree, and applying inline patterns
Keyword arguments:
* el - parent element of Document.
- Returns: NanoDOM Document object with applied inline patterns.
+ Returns: ElementTree object with applied inline patterns.
"""
-
+
stack = [el]
while stack:
currElement = stack.pop()
@@ -1792,34 +1800,45 @@ class Markdown:
for child in currElement.getchildren():
if child.tag == "inline":
-
+
lst = self._processPlaceholders(self._handleInline(
child.text), currElement)
-
+
pos = currElement.getchildren().index(child)
insertQueue.append((child, pos, lst))
else:
stack.append(child)
+
for element, pos, lst in insertQueue:
currElement.remove(element)
+ if currElement.text:
+ currElement.text = handleAttributes(currElement.text,
+ currElement)
for newChild in lst:
+ # Processing attributes
+ if newChild.tail:
+ newChild.tail = handleAttributes(newChild.tail,
+ currElement)
+ if newChild.text:
+ newChild.text = handleAttributes(newChild.text,
+ newChild)
currElement.insert(pos, newChild)
pos += 1
def applyInlinePatterns(self, markdownTree):
"""
- Retrun NanoDOM markdown tree, with applied
+ Retrun ElementTree, with applied
inline paterns
Keyword arguments:
- * markdownTree: NanoDOM Document object, reppresenting Markdown tree.
+ * markdownTree: ElementTree object, reppresenting Markdown tree.
- Returns: NanoDOM Document object.
+ Returns: ElementTree object.
"""
@@ -1833,14 +1852,14 @@ class Markdown:
def markdownToTree(self, source=None):
"""
- Retrun NanoDOM markdown tree, without applying
+ Retrun ElementTree, without applying
inline paterns
Keyword arguments:
* source: An ascii or unicode string of Markdown formated text.
- Returns: NanoDOM Document object.
+ Returns: ElementTree object.
"""
if source is not None: #Allow blank string
self.source = source