aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIsaac Muse <faceless.shop@gmail.com>2017-01-19 06:51:06 -0700
committerWaylan Limberg <waylan.limberg@icloud.com>2017-01-19 08:51:06 -0500
commitc70b2c4154d9b6e46f282c1f212c52e9fbfa5a07 (patch)
tree3a304d44324a5d0c9b8eabff7a86cc777163b62a
parentb52293b2858138231795aa72aac1cf4799eb8da9 (diff)
downloadmarkdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.gz
markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.bz2
markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.zip
Tables: Improvements (#530)
Tables now handle escaped pipes when testing, in table borders, and in the inline content. To achieve properly, a bug had to be fixed related to appending escaped chars to the Markdown class. Now appended chars only appear in the current instance. Lastly the first backtick in a table can be escaped rounding out the last corner case.
-rw-r--r--markdown/__init__.py6
-rw-r--r--markdown/extensions/tables.py79
-rw-r--r--tests/extensions/extra/tables.html74
-rw-r--r--tests/extensions/extra/tables.txt31
-rw-r--r--tests/test_apis.py12
5 files changed, 172 insertions, 30 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 78ea4cb..409f9cf 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -75,9 +75,6 @@ class Markdown(object):
'xhtml5': to_xhtml_string,
}
- ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
- '(', ')', '>', '#', '+', '-', '.', '!']
-
def __init__(self, *args, **kwargs):
"""
Creates a new Markdown instance.
@@ -147,6 +144,9 @@ class Markdown(object):
'deprecated along with "safe_mode".',
DeprecationWarning)
+ self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
+ '(', ')', '>', '#', '+', '-', '.', '!']
+
self.registeredExtensions = []
self.docType = ""
self.stripTopLevelTags = True
diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py
index 4bb2076..ebe6ffa 100644
--- a/markdown/extensions/tables.py
+++ b/markdown/extensions/tables.py
@@ -26,28 +26,43 @@ import re
class TableProcessor(BlockProcessor):
""" Process Tables. """
- RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))')
+ RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
+ RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
+
+ def __init__(self, parser):
+ self.border = False
+ self.separator = ''
+ super(TableProcessor, self).__init__(parser)
def test(self, parent, block):
- rows = block.split('\n')
- return (len(rows) > 1 and '|' in rows[0] and
- '|' in rows[1] and '-' in rows[1] and
- rows[1].strip()[0] in ['|', ':', '-'] and
- set(rows[1]) <= set('|:- '))
+ """
+ Ensure first two rows (column header and separator row) are valid table rows.
+
+ Keep border check and separator row do avoid repeating the work.
+ """
+ is_table = False
+ header = [row.strip() for row in block.split('\n')[0:2]]
+ if len(header) == 2:
+ self.border = header[0].startswith('|')
+ row = self._split_row(header[0])
+ is_table = len(row) > 1
+
+ if is_table:
+ row = self._split_row(header[1])
+ is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ')
+ if is_table:
+ self.separator = row
+ return is_table
def run(self, parent, blocks):
""" Parse a table block and build table. """
block = blocks.pop(0).split('\n')
header = block[0].strip()
- seperator = block[1].strip()
rows = [] if len(block) < 3 else block[2:]
- # Get format type (bordered by pipes or not)
- border = False
- if header.startswith('|'):
- border = True
+
# Get alignment of columns
align = []
- for c in self._split_row(seperator, border):
+ for c in self.separator:
c = c.strip()
if c.startswith(':') and c.endswith(':'):
align.append('center')
@@ -57,21 +72,22 @@ class TableProcessor(BlockProcessor):
align.append('right')
else:
align.append(None)
+
# Build table
table = etree.SubElement(parent, 'table')
thead = etree.SubElement(table, 'thead')
- self._build_row(header, thead, align, border)
+ self._build_row(header, thead, align)
tbody = etree.SubElement(table, 'tbody')
for row in rows:
- self._build_row(row.strip(), tbody, align, border)
+ self._build_row(row.strip(), tbody, align)
- def _build_row(self, row, parent, align, border):
+ def _build_row(self, row, parent, align):
""" Given a row of text, build table cells. """
tr = etree.SubElement(parent, 'tr')
tag = 'td'
if parent.tag == 'thead':
tag = 'th'
- cells = self._split_row(row, border)
+ cells = self._split_row(row)
# We use align here rather than cells to ensure every row
# contains the same number of columns.
for i, a in enumerate(align):
@@ -83,13 +99,12 @@ class TableProcessor(BlockProcessor):
if a:
c.set('align', a)
- def _split_row(self, row, border):
+ def _split_row(self, row):
""" split a row of text into list of cells. """
- if border:
+ if self.border:
if row.startswith('|'):
row = row[1:]
- if row.endswith('|'):
- row = row[:-1]
+ row = self.RE_END_BORDER.sub('', row)
return self._split(row)
def _split(self, row):
@@ -106,23 +121,33 @@ class TableProcessor(BlockProcessor):
for m in self.RE_CODE_PIPES.finditer(row):
# Store ` data (len, start_pos, end_pos)
if m.group(2):
+ # \`+
+ # Store length of each tic group: subtract \
+ tics.append(len(m.group(2)) - 1)
+ # Store start of group, end of group, and escape length
+ tic_points.append((m.start(2), m.end(2) - 1, 1))
+ elif m.group(3):
# `+
# Store length of each tic group
- tics.append(len(m.group(2)))
- # Store start and end of tic group
- tic_points.append((m.start(2), m.end(2) - 1))
+ tics.append(len(m.group(3)))
+ # Store start of group, end of group, and escape length
+ tic_points.append((m.start(3), m.end(3) - 1, 0))
# Store pipe location
- elif m.group(4):
- pipes.append(m.start(4))
+ elif m.group(5):
+ pipes.append(m.start(5))
# Pair up tics according to size if possible
+ # Subtract the escape length *only* from the opening.
# Walk through tic list and see if tic has a close.
# Store the tic region (start of region, end of region).
pos = 0
tic_len = len(tics)
while pos < tic_len:
try:
- index = tics[pos + 1:].index(tics[pos]) + 1
+ tic_size = tics[pos] - tic_points[pos][2]
+ if tic_size == 0:
+ raise ValueError
+ index = tics[pos + 1:].index(tic_size) + 1
tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
pos += index + 1
except ValueError:
@@ -160,6 +185,8 @@ class TableExtension(Extension):
def extendMarkdown(self, md, md_globals):
""" Add an instance of TableProcessor to BlockParser. """
+ if '|' not in md.ESCAPED_CHARS:
+ md.ESCAPED_CHARS.append('|')
md.parser.blockprocessors.add('table',
TableProcessor(md.parser),
'<hashheader')
diff --git a/tests/extensions/extra/tables.html b/tests/extensions/extra/tables.html
index a0b1f71..b81582c 100644
--- a/tests/extensions/extra/tables.html
+++ b/tests/extensions/extra/tables.html
@@ -284,4 +284,76 @@ Content Cell | Content Cell
<td><code>\</code></td>
</tr>
</tbody>
-</table> \ No newline at end of file
+</table>
+<p>Only the first backtick can be escaped</p>
+<table>
+<thead>
+<tr>
+<th>Escaped</th>
+<th>Bacticks</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>`<code>\</code></td>
+<td>``</td>
+</tr>
+</tbody>
+</table>
+<p>Test escaped pipes</p>
+<table>
+<thead>
+<tr>
+<th>Column 1</th>
+<th>Column 2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>|</code> |</td>
+<td>Pipes are okay in code and escaped. |</td>
+</tr>
+</tbody>
+</table>
+<table>
+<thead>
+<tr>
+<th>Column 1</th>
+<th>Column 2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>row1</td>
+<td>row1 |</td>
+</tr>
+<tr>
+<td>row2</td>
+<td>row2</td>
+</tr>
+</tbody>
+</table>
+<p>Test header escapes</p>
+<table>
+<thead>
+<tr>
+<th><code>`\</code> |</th>
+<th><code>\</code> |</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>row1</td>
+<td>row1</td>
+</tr>
+<tr>
+<td>row2</td>
+<td>row2</td>
+</tr>
+</tbody>
+</table>
+<p>Escaped pipes in format row should not be a table</p>
+<p>| Column1 | Column2 |
+| ------- || ------- |
+| row1 | row1 |
+| row2 | row2 |</p> \ No newline at end of file
diff --git a/tests/extensions/extra/tables.txt b/tests/extensions/extra/tables.txt
index a9677ba..d5bd6ea 100644
--- a/tests/extensions/extra/tables.txt
+++ b/tests/extensions/extra/tables.txt
@@ -90,3 +90,34 @@ Odd backticks | Even backticks
Escapes | More Escapes
------- | ------
`` `\`` | `\`
+
+Only the first backtick can be escaped
+
+Escaped | Bacticks
+------- | ------
+\`` \` | \`\`
+
+Test escaped pipes
+
+Column 1 | Column 2
+-------- | --------
+`|` \| | Pipes are okay in code and escaped. \|
+
+| Column 1 | Column 2 |
+| -------- | -------- |
+| row1 | row1 \|
+| row2 | row2 |
+
+Test header escapes
+
+| `` `\`` \| | `\` \|
+| ---------- | ---- |
+| row1 | row1 |
+| row2 | row2 |
+
+Escaped pipes in format row should not be a table
+
+| Column1 | Column2 |
+| ------- \|| ------- |
+| row1 | row1 |
+| row2 | row2 |
diff --git a/tests/test_apis.py b/tests/test_apis.py
index e3de779..7b1214f 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -758,3 +758,15 @@ PLACE_MARKER= ~~~footnotes~~~
"""
self.create_config_file(config)
self.assertRaises(yaml.YAMLError, parse_options, ['-c', self.tempfile])
+
+
+class TestEscapeAppend(unittest.TestCase):
+ """ Tests escape character append. """
+
+ def testAppend(self):
+ """ Test that appended escapes are only in the current instance. """
+ md = markdown.Markdown()
+ md.ESCAPED_CHARS.append('|')
+ self.assertEqual('|' in md.ESCAPED_CHARS, True)
+ md2 = markdown.Markdown()
+ self.assertEqual('|' not in md2.ESCAPED_CHARS, True)