aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWaylan Limberg <waylan@gmail.com>2008-11-12 23:00:31 -0500
committerWaylan Limberg <waylan@gmail.com>2008-11-13 23:25:15 -0500
commit8d367ca71c610c49d3b3d5c81f49cb38f9e97fb9 (patch)
tree77823e3c677e403382dd22473e688fb93568c352
parentba147ca9b2eae544e802c8216936065d2d86a8d8 (diff)
downloadmarkdown-8d367ca71c610c49d3b3d5c81f49cb38f9e97fb9.tar.gz
markdown-8d367ca71c610c49d3b3d5c81f49cb38f9e97fb9.tar.bz2
markdown-8d367ca71c610c49d3b3d5c81f49cb38f9e97fb9.zip
Fixed various issues with the core parser - mostly whitespace related and updated a few tests that weren't quite right - that is they now better match pl or php implementations.
-rwxr-xr-xmarkdown.py112
-rw-r--r--tests/misc/blockquote.html6
-rw-r--r--tests/misc/multi-paragraph-block-quote.html4
3 files changed, 86 insertions, 36 deletions
diff --git a/markdown.py b/markdown.py
index b40093f..9fc6355 100755
--- a/markdown.py
+++ b/markdown.py
@@ -240,17 +240,26 @@ class ListIndentProcessor(BlockProcessor):
""" Process children of list items. """
def test(self, parent, block):
- return block.startswith(' '*4) and parent[-1] and \
- (parent[-1].tag == "ul" or parent[-1].tag == "ol")
+ return block.startswith(' '*4) and \
+ (parent.tag == "li" or \
+ (len(parent) and parent[-1] and \
+ (parent[-1].tag == "ul" or parent[-1].tag == "ol")
+ )
+ )
def run(self, parent, blocks):
- block = blocks.pop(0)
+ block = self.looseDetab(blocks.pop(0))
sibling = self.lastChild(parent)
- if len(sibling) and sibling[-1].tag == 'li':
- self.parser.parseBlocks(sibling[-1], [self.looseDetab(block)])
+ if parent.tag == 'li':
+ self.parser.parseBlocks(parent, [block])
+ elif len(sibling) and sibling[-1].tag == 'li':
+ if sibling[-1].text:
+ block = '%s\n\n%s' % (sibling[-1].text, block)
+ sibling[-1].text = ''
+ self.parser.parseChunk(sibling[-1], block)
else:
li = etree.SubElement(sibling, 'li')
- self.parser.parseBlocks(li, [self.looseDetab(block)])
+ self.parser.parseBlocks(li, [block])
class CodeBlockProcessor(BlockProcessor):
@@ -267,12 +276,12 @@ class CodeBlockProcessor(BlockProcessor):
and sibling[0].tag == "code":
code = sibling[0]
block, theRest = self.detab(block)
- code.text = '%s\n%s\n' % (code.text, block.rstrip())
+ code.text = AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
else:
pre = etree.SubElement(parent, 'pre')
code = etree.SubElement(pre, 'code')
block, theRest = self.detab(block)
- code.text = '%s\n' % block.rstrip()
+ code.text = AtomicString('%s\n' % block.rstrip())
if theRest:
blocks.insert(0, theRest)
@@ -292,15 +301,15 @@ class BlockQuoteProcessor(BlockProcessor):
quote = sibling
else:
quote = etree.SubElement(parent, 'blockquote')
- self.parser.parseBlocks(quote, [block])
+ self.parser.parseChunk(quote, block)
def clean(self, line):
""" Remove ``>`` from begining of a line. """
m = self.RE.match(line)
- if m:
- return m.group(1)
- elif line.strip() == ">":
+ if line.strip() == ">":
return ""
+ elif m:
+ return m.group(1)
else:
return line
@@ -318,7 +327,7 @@ class OListProcessor(BlockProcessor):
sibling = self.lastChild(parent)
if sibling and sibling.tag == self.TAG:
lst = sibling
- # make sure previous item is in a p.
+ # make sure previous item is in a p.
if len(lst) and lst[-1].text and not len(lst[-1]):
p = etree.SubElement(lst[-1], 'p')
p.text = lst[-1].text
@@ -333,8 +342,11 @@ class OListProcessor(BlockProcessor):
lst = etree.SubElement(parent, self.TAG)
self.parser.state = 'list'
for item in items:
- li = etree.SubElement(lst, 'li')
- self.parser.parseBlocks(li, [item])
+ if item.startswith(' '*4):
+ self.parser.parseBlocks(lst[-1], [item])
+ else:
+ li = etree.SubElement(lst, 'li')
+ self.parser.parseBlocks(li, [item])
self.parser.resetState()
def get_items(self, block):
@@ -344,6 +356,11 @@ class OListProcessor(BlockProcessor):
m = self.RE.match(line)
if m:
items.append(m.group(1))
+ elif line.startswith(' '*4):
+ if items[-1].startswith(' '*4):
+ items[-1] = '%s\n%s' % (items[-1], line)
+ else:
+ items.append(line)
else:
items[-1] = '\n'.join([items[-1], line])
return items
@@ -359,22 +376,25 @@ class UListProcessor(OListProcessor):
class HashHeaderProcessor(BlockProcessor):
""" Process Hash Headers. """
- RE = re.compile(r'^(#{1,6})(.*?)#*$')
+ RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)')
def test(self, parent, block):
- return block.startswith('#')
+ return bool(self.RE.search(block))
def run(self, parent, blocks):
- lines = blocks.pop(0).split('\n')
- line1 = lines.pop(0)
- m = self.RE.match(line1)
+ block = blocks.pop(0)
+ m = self.RE.search(block)
if m:
- h = etree.SubElement(parent, 'h%d' % len(m.group(1)))
- h.text = m.group(2).strip()
+ before = block[:m.start()]
+ after = block[m.end():]
+ if before:
+ self.parser.parseBlocks(parent, [before])
+ h = etree.SubElement(parent, 'h%d' % len(m.group('level')))
+ h.text = m.group('header').strip()
+ if after:
+ blocks.insert(0, after)
else:
- lines.insert(0, line1)
- if len(lines):
- blocks.insert(0, '\n'.join(lines))
+ message(CRITICAL, "We've got a problem header!")
class SHeaderProcessor(BlockProcessor):
@@ -400,17 +420,20 @@ class SHeaderProcessor(BlockProcessor):
class HRProcessor(BlockProcessor):
""" Process Horizontal Rules. """
- RE = re.compile(r'([*_-][ ]?){3,}')
+ RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*'
+ SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
+ MATCH_RE = re.compile(r'^%s$' % RE)
def test(self, parent, block):
- return bool(self.RE.search(block))
+ return bool(self.SEARCH_RE.search(block))
def run(self, parent, blocks):
# Check for lines in block before hr.
+ #import ipdb; ipdb.set_trace()
lines = blocks.pop(0).split('\n')
prelines = []
for line in lines:
- m = self.RE.match(line)
+ m = self.MATCH_RE.match(line)
if m:
break
else:
@@ -425,6 +448,25 @@ class HRProcessor(BlockProcessor):
blocks.insert(0, '\n'.join(lines))
+class EmptyBlockProcessor(BlockProcessor):
+ """ Process blocks and start with an empty line. """
+
+ RE = re.compile(r'^\s*\n')
+
+ def test(self, parent, block):
+ return bool(self.RE.match(block))
+
+ def run(self, parent, blocks):
+ block = blocks.pop(0)
+ m = self.RE.match(block)
+ if m:
+ blocks.insert(0, block[m.end():])
+ sibling = self.lastChild(parent)
+ if sibling and sibling.tag == 'pre' and sibling[0] and \
+ sibling[0].tag == 'code':
+ sibling[0].text = AtomicString('%s/n/n/n' % sibling[0].text )
+
+
class PBlockProcessor(BlockProcessor):
""" Process Paragraph blocks. """
@@ -435,7 +477,10 @@ class PBlockProcessor(BlockProcessor):
block = blocks.pop(0)
if block.strip():
if self.parser.state == 'list':
- parent.text = block
+ if parent.text:
+ parent.text = '%s\n%s' % (parent.text, block)
+ else:
+ parent.text = block
else:
p = etree.SubElement(parent, 'p')
p.text = block
@@ -446,6 +491,7 @@ class BlockParser:
def __init__(self):
self.blockprocessors = OrderedDict()
+ self.blockprocessors['empty'] = EmptyBlockProcessor(self)
self.blockprocessors['indent'] = ListIndentProcessor(self)
self.blockprocessors['code'] = CodeBlockProcessor(self)
self.blockprocessors['hashheader'] = HashHeaderProcessor(self)
@@ -464,10 +510,13 @@ class BlockParser:
""" Parse a markdown string into an ElementTree. """
# Create a ElementTree from the lines
root = etree.Element("div")
- blocks = '\n'.join(lines).split('\n\n')
- self.parseBlocks(root, blocks)
+ self.parseChunk(root, '\n'.join(lines))
return etree.ElementTree(root)
+ def parseChunk(self, parent, text):
+ """ Parse a chunk of markdown text and attach to given etree node. """
+ self.parseBlocks(parent, text.split('\n\n'))
+
def parseBlocks(self, parent, blocks):
""" Process blocks of markdown text and attach to given etree node. """
while blocks:
@@ -1722,6 +1771,7 @@ class Markdown:
source = source.replace(STX, "").replace(ETX, "")
source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+ source = re.sub(r'\n\s\n', '\n\n', source)
source = source.expandtabs(TAB_LENGTH)
# Split into lines and run the line preprocessors.
diff --git a/tests/misc/blockquote.html b/tests/misc/blockquote.html
index b52e92d..4481d51 100644
--- a/tests/misc/blockquote.html
+++ b/tests/misc/blockquote.html
@@ -10,13 +10,15 @@
<p>blockquote with 2 spaces.</p>
</blockquote>
<p>baz</p>
-<p> &gt; this has three spaces so its a paragraph.</p>
+<blockquote>
+<p>this has three spaces so its a paragraph.</p>
+</blockquote>
<p>blah</p>
<pre><code>&gt; this one had four so it's a code block.
</code></pre>
<blockquote>
<blockquote>
<p>this nested blockquote has 0 on level one and 3 (one after the first <code>&gt;</code> + 2 more) on level 2.</p>
+<p>and this has 4 on level 2 - another code block.</p>
</blockquote>
-<p> &gt; and this has 4 on level 2 - another code block.</p>
</blockquote> \ No newline at end of file
diff --git a/tests/misc/multi-paragraph-block-quote.html b/tests/misc/multi-paragraph-block-quote.html
index 3602405..e13986a 100644
--- a/tests/misc/multi-paragraph-block-quote.html
+++ b/tests/misc/multi-paragraph-block-quote.html
@@ -1,8 +1,6 @@
<blockquote>
<p>This is line one of paragraph one
-This is line two of paragraph one</p>
+ This is line two of paragraph one</p>
<p>This is line one of paragraph two</p>
-</blockquote>
-<blockquote>
<p>This is another blockquote.</p>
</blockquote> \ No newline at end of file