diff options
author | Isaac Muse <faceless.shop@gmail.com> | 2017-01-19 06:51:06 -0700 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2017-01-19 08:51:06 -0500 |
commit | c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07 (patch) | |
tree | 3a304d44324a5d0c9b8eabff7a86cc777163b62a | |
parent | b52293b2858138231795aa72aac1cf4799eb8da9 (diff) | |
download | markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.gz markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.bz2 markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.zip |
Tables: Improvements (#530)
Tables now handle escaped pipes when testing, in table borders, and in
the inline content. To achieve properly, a bug had to be fixed related
to appending escaped chars to the Markdown class. Now appended chars
only appear in the current instance. Lastly the first backtick in a
table can be escaped rounding out the last corner case.
-rw-r--r-- | markdown/__init__.py | 6 | ||||
-rw-r--r-- | markdown/extensions/tables.py | 79 | ||||
-rw-r--r-- | tests/extensions/extra/tables.html | 74 | ||||
-rw-r--r-- | tests/extensions/extra/tables.txt | 31 | ||||
-rw-r--r-- | tests/test_apis.py | 12 |
5 files changed, 172 insertions, 30 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py index 78ea4cb..409f9cf 100644 --- a/markdown/__init__.py +++ b/markdown/__init__.py @@ -75,9 +75,6 @@ class Markdown(object): 'xhtml5': to_xhtml_string, } - ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', - '(', ')', '>', '#', '+', '-', '.', '!'] - def __init__(self, *args, **kwargs): """ Creates a new Markdown instance. @@ -147,6 +144,9 @@ class Markdown(object): 'deprecated along with "safe_mode".', DeprecationWarning) + self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', + '(', ')', '>', '#', '+', '-', '.', '!'] + self.registeredExtensions = [] self.docType = "" self.stripTopLevelTags = True diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py index 4bb2076..ebe6ffa 100644 --- a/markdown/extensions/tables.py +++ b/markdown/extensions/tables.py @@ -26,28 +26,43 @@ import re class TableProcessor(BlockProcessor): """ Process Tables. """ - RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))') + RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') + RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') + + def __init__(self, parser): + self.border = False + self.separator = '' + super(TableProcessor, self).__init__(parser) def test(self, parent, block): - rows = block.split('\n') - return (len(rows) > 1 and '|' in rows[0] and - '|' in rows[1] and '-' in rows[1] and - rows[1].strip()[0] in ['|', ':', '-'] and - set(rows[1]) <= set('|:- ')) + """ + Ensure first two rows (column header and separator row) are valid table rows. + + Keep border check and separator row do avoid repeating the work. + """ + is_table = False + header = [row.strip() for row in block.split('\n')[0:2]] + if len(header) == 2: + self.border = header[0].startswith('|') + row = self._split_row(header[0]) + is_table = len(row) > 1 + + if is_table: + row = self._split_row(header[1]) + is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ') + if is_table: + self.separator = row + return is_table def run(self, parent, blocks): """ Parse a table block and build table. """ block = blocks.pop(0).split('\n') header = block[0].strip() - seperator = block[1].strip() rows = [] if len(block) < 3 else block[2:] - # Get format type (bordered by pipes or not) - border = False - if header.startswith('|'): - border = True + # Get alignment of columns align = [] - for c in self._split_row(seperator, border): + for c in self.separator: c = c.strip() if c.startswith(':') and c.endswith(':'): align.append('center') @@ -57,21 +72,22 @@ class TableProcessor(BlockProcessor): align.append('right') else: align.append(None) + # Build table table = etree.SubElement(parent, 'table') thead = etree.SubElement(table, 'thead') - self._build_row(header, thead, align, border) + self._build_row(header, thead, align) tbody = etree.SubElement(table, 'tbody') for row in rows: - self._build_row(row.strip(), tbody, align, border) + self._build_row(row.strip(), tbody, align) - def _build_row(self, row, parent, align, border): + def _build_row(self, row, parent, align): """ Given a row of text, build table cells. """ tr = etree.SubElement(parent, 'tr') tag = 'td' if parent.tag == 'thead': tag = 'th' - cells = self._split_row(row, border) + cells = self._split_row(row) # We use align here rather than cells to ensure every row # contains the same number of columns. for i, a in enumerate(align): @@ -83,13 +99,12 @@ class TableProcessor(BlockProcessor): if a: c.set('align', a) - def _split_row(self, row, border): + def _split_row(self, row): """ split a row of text into list of cells. """ - if border: + if self.border: if row.startswith('|'): row = row[1:] - if row.endswith('|'): - row = row[:-1] + row = self.RE_END_BORDER.sub('', row) return self._split(row) def _split(self, row): @@ -106,23 +121,33 @@ class TableProcessor(BlockProcessor): for m in self.RE_CODE_PIPES.finditer(row): # Store ` data (len, start_pos, end_pos) if m.group(2): + # \`+ + # Store length of each tic group: subtract \ + tics.append(len(m.group(2)) - 1) + # Store start of group, end of group, and escape length + tic_points.append((m.start(2), m.end(2) - 1, 1)) + elif m.group(3): # `+ # Store length of each tic group - tics.append(len(m.group(2))) - # Store start and end of tic group - tic_points.append((m.start(2), m.end(2) - 1)) + tics.append(len(m.group(3))) + # Store start of group, end of group, and escape length + tic_points.append((m.start(3), m.end(3) - 1, 0)) # Store pipe location - elif m.group(4): - pipes.append(m.start(4)) + elif m.group(5): + pipes.append(m.start(5)) # Pair up tics according to size if possible + # Subtract the escape length *only* from the opening. # Walk through tic list and see if tic has a close. # Store the tic region (start of region, end of region). pos = 0 tic_len = len(tics) while pos < tic_len: try: - index = tics[pos + 1:].index(tics[pos]) + 1 + tic_size = tics[pos] - tic_points[pos][2] + if tic_size == 0: + raise ValueError + index = tics[pos + 1:].index(tic_size) + 1 tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) pos += index + 1 except ValueError: @@ -160,6 +185,8 @@ class TableExtension(Extension): def extendMarkdown(self, md, md_globals): """ Add an instance of TableProcessor to BlockParser. """ + if '|' not in md.ESCAPED_CHARS: + md.ESCAPED_CHARS.append('|') md.parser.blockprocessors.add('table', TableProcessor(md.parser), '<hashheader') diff --git a/tests/extensions/extra/tables.html b/tests/extensions/extra/tables.html index a0b1f71..b81582c 100644 --- a/tests/extensions/extra/tables.html +++ b/tests/extensions/extra/tables.html @@ -284,4 +284,76 @@ Content Cell | Content Cell <td><code>\</code></td> </tr> </tbody> -</table>
\ No newline at end of file +</table> +<p>Only the first backtick can be escaped</p> +<table> +<thead> +<tr> +<th>Escaped</th> +<th>Bacticks</th> +</tr> +</thead> +<tbody> +<tr> +<td>`<code>\</code></td> +<td>``</td> +</tr> +</tbody> +</table> +<p>Test escaped pipes</p> +<table> +<thead> +<tr> +<th>Column 1</th> +<th>Column 2</th> +</tr> +</thead> +<tbody> +<tr> +<td><code>|</code> |</td> +<td>Pipes are okay in code and escaped. |</td> +</tr> +</tbody> +</table> +<table> +<thead> +<tr> +<th>Column 1</th> +<th>Column 2</th> +</tr> +</thead> +<tbody> +<tr> +<td>row1</td> +<td>row1 |</td> +</tr> +<tr> +<td>row2</td> +<td>row2</td> +</tr> +</tbody> +</table> +<p>Test header escapes</p> +<table> +<thead> +<tr> +<th><code>`\</code> |</th> +<th><code>\</code> |</th> +</tr> +</thead> +<tbody> +<tr> +<td>row1</td> +<td>row1</td> +</tr> +<tr> +<td>row2</td> +<td>row2</td> +</tr> +</tbody> +</table> +<p>Escaped pipes in format row should not be a table</p> +<p>| Column1 | Column2 | +| ------- || ------- | +| row1 | row1 | +| row2 | row2 |</p>
\ No newline at end of file diff --git a/tests/extensions/extra/tables.txt b/tests/extensions/extra/tables.txt index a9677ba..d5bd6ea 100644 --- a/tests/extensions/extra/tables.txt +++ b/tests/extensions/extra/tables.txt @@ -90,3 +90,34 @@ Odd backticks | Even backticks Escapes | More Escapes ------- | ------ `` `\`` | `\` + +Only the first backtick can be escaped + +Escaped | Bacticks +------- | ------ +\`` \` | \`\` + +Test escaped pipes + +Column 1 | Column 2 +-------- | -------- +`|` \| | Pipes are okay in code and escaped. \| + +| Column 1 | Column 2 | +| -------- | -------- | +| row1 | row1 \| +| row2 | row2 | + +Test header escapes + +| `` `\`` \| | `\` \| +| ---------- | ---- | +| row1 | row1 | +| row2 | row2 | + +Escaped pipes in format row should not be a table + +| Column1 | Column2 | +| ------- \|| ------- | +| row1 | row1 | +| row2 | row2 | diff --git a/tests/test_apis.py b/tests/test_apis.py index e3de779..7b1214f 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -758,3 +758,15 @@ PLACE_MARKER= ~~~footnotes~~~ """ self.create_config_file(config) self.assertRaises(yaml.YAMLError, parse_options, ['-c', self.tempfile]) + + +class TestEscapeAppend(unittest.TestCase): + """ Tests escape character append. """ + + def testAppend(self): + """ Test that appended escapes are only in the current instance. """ + md = markdown.Markdown() + md.ESCAPED_CHARS.append('|') + self.assertEqual('|' in md.ESCAPED_CHARS, True) + md2 = markdown.Markdown() + self.assertEqual('|' not in md2.ESCAPED_CHARS, True) |