From c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07 Mon Sep 17 00:00:00 2001
From: Isaac Muse <faceless.shop@gmail.com>
Date: Thu, 19 Jan 2017 06:51:06 -0700
Subject: Tables: Improvements (#530)

Tables now handle escaped pipes when testing, in table borders, and in
the inline content.  To achieve properly, a bug had to be fixed related
to appending escaped chars to the Markdown class.  Now appended chars
only appear in the current instance.  Lastly the first backtick in a
table can be escaped rounding out the last corner case.
---
 markdown/__init__.py               |  6 +--
 markdown/extensions/tables.py      | 79 +++++++++++++++++++++++++-------------
 tests/extensions/extra/tables.html | 74 ++++++++++++++++++++++++++++++++++-
 tests/extensions/extra/tables.txt  | 31 +++++++++++++++
 tests/test_apis.py                 | 12 ++++++
 5 files changed, 172 insertions(+), 30 deletions(-)

diff --git a/markdown/__init__.py b/markdown/__init__.py
index 78ea4cb..409f9cf 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -75,9 +75,6 @@ class Markdown(object):
         'xhtml5': to_xhtml_string,
     }
 
-    ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
-                     '(', ')', '>', '#', '+', '-', '.', '!']
-
     def __init__(self, *args, **kwargs):
         """
         Creates a new Markdown instance.
@@ -147,6 +144,9 @@ class Markdown(object):
                           'deprecated along with "safe_mode".',
                           DeprecationWarning)
 
+        self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
+                              '(', ')', '>', '#', '+', '-', '.', '!']
+
         self.registeredExtensions = []
         self.docType = ""
         self.stripTopLevelTags = True
diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py
index 4bb2076..ebe6ffa 100644
--- a/markdown/extensions/tables.py
+++ b/markdown/extensions/tables.py
@@ -26,28 +26,43 @@ import re
 class TableProcessor(BlockProcessor):
     """ Process Tables. """
 
-    RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))')
+    RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
+    RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
+
+    def __init__(self, parser):
+        self.border = False
+        self.separator = ''
+        super(TableProcessor, self).__init__(parser)
 
     def test(self, parent, block):
-        rows = block.split('\n')
-        return (len(rows) > 1 and '|' in rows[0] and
-                '|' in rows[1] and '-' in rows[1] and
-                rows[1].strip()[0] in ['|', ':', '-'] and
-                set(rows[1]) <= set('|:- '))
+        """
+        Ensure first two rows (column header and separator row) are valid table rows.
+
+        Keep border check and separator row do avoid repeating the work.
+        """
+        is_table = False
+        header = [row.strip() for row in block.split('\n')[0:2]]
+        if len(header) == 2:
+            self.border = header[0].startswith('|')
+            row = self._split_row(header[0])
+            is_table = len(row) > 1
+
+            if is_table:
+                row = self._split_row(header[1])
+                is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ')
+                if is_table:
+                    self.separator = row
+        return is_table
 
     def run(self, parent, blocks):
         """ Parse a table block and build table. """
         block = blocks.pop(0).split('\n')
         header = block[0].strip()
-        seperator = block[1].strip()
         rows = [] if len(block) < 3 else block[2:]
-        # Get format type (bordered by pipes or not)
-        border = False
-        if header.startswith('|'):
-            border = True
+
         # Get alignment of columns
         align = []
-        for c in self._split_row(seperator, border):
+        for c in self.separator:
             c = c.strip()
             if c.startswith(':') and c.endswith(':'):
                 align.append('center')
@@ -57,21 +72,22 @@ class TableProcessor(BlockProcessor):
                 align.append('right')
             else:
                 align.append(None)
+
         # Build table
         table = etree.SubElement(parent, 'table')
         thead = etree.SubElement(table, 'thead')
-        self._build_row(header, thead, align, border)
+        self._build_row(header, thead, align)
         tbody = etree.SubElement(table, 'tbody')
         for row in rows:
-            self._build_row(row.strip(), tbody, align, border)
+            self._build_row(row.strip(), tbody, align)
 
-    def _build_row(self, row, parent, align, border):
+    def _build_row(self, row, parent, align):
         """ Given a row of text, build table cells. """
         tr = etree.SubElement(parent, 'tr')
         tag = 'td'
         if parent.tag == 'thead':
             tag = 'th'
-        cells = self._split_row(row, border)
+        cells = self._split_row(row)
         # We use align here rather than cells to ensure every row
         # contains the same number of columns.
         for i, a in enumerate(align):
@@ -83,13 +99,12 @@ class TableProcessor(BlockProcessor):
             if a:
                 c.set('align', a)
 
-    def _split_row(self, row, border):
+    def _split_row(self, row):
         """ split a row of text into list of cells. """
-        if border:
+        if self.border:
             if row.startswith('|'):
                 row = row[1:]
-            if row.endswith('|'):
-                row = row[:-1]
+            row = self.RE_END_BORDER.sub('', row)
         return self._split(row)
 
     def _split(self, row):
@@ -106,23 +121,33 @@ class TableProcessor(BlockProcessor):
         for m in self.RE_CODE_PIPES.finditer(row):
             # Store ` data (len, start_pos, end_pos)
             if m.group(2):
+                # \`+
+                # Store length of each tic group: subtract \
+                tics.append(len(m.group(2)) - 1)
+                # Store start of group, end of group, and escape length
+                tic_points.append((m.start(2), m.end(2) - 1, 1))
+            elif m.group(3):
                 # `+
                 # Store length of each tic group
-                tics.append(len(m.group(2)))
-                # Store start and end of tic group
-                tic_points.append((m.start(2), m.end(2) - 1))
+                tics.append(len(m.group(3)))
+                # Store start of group, end of group, and escape length
+                tic_points.append((m.start(3), m.end(3) - 1, 0))
             # Store pipe location
-            elif m.group(4):
-                pipes.append(m.start(4))
+            elif m.group(5):
+                pipes.append(m.start(5))
 
         # Pair up tics according to size if possible
+        # Subtract the escape length *only* from the opening.
         # Walk through tic list and see if tic has a close.
         # Store the tic region (start of region, end of region).
         pos = 0
         tic_len = len(tics)
         while pos < tic_len:
             try:
-                index = tics[pos + 1:].index(tics[pos]) + 1
+                tic_size = tics[pos] - tic_points[pos][2]
+                if tic_size == 0:
+                    raise ValueError
+                index = tics[pos + 1:].index(tic_size) + 1
                 tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
                 pos += index + 1
             except ValueError:
@@ -160,6 +185,8 @@ class TableExtension(Extension):
 
     def extendMarkdown(self, md, md_globals):
         """ Add an instance of TableProcessor to BlockParser. """
+        if '|' not in md.ESCAPED_CHARS:
+            md.ESCAPED_CHARS.append('|')
         md.parser.blockprocessors.add('table',
                                       TableProcessor(md.parser),
                                       '<hashheader')
diff --git a/tests/extensions/extra/tables.html b/tests/extensions/extra/tables.html
index a0b1f71..b81582c 100644
--- a/tests/extensions/extra/tables.html
+++ b/tests/extensions/extra/tables.html
@@ -284,4 +284,76 @@ Content Cell | Content Cell
 <td><code>\</code></td>
 </tr>
 </tbody>
-</table>
\ No newline at end of file
+</table>
+<p>Only the first backtick can be escaped</p>
+<table>
+<thead>
+<tr>
+<th>Escaped</th>
+<th>Bacticks</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>`<code>\</code></td>
+<td>``</td>
+</tr>
+</tbody>
+</table>
+<p>Test escaped pipes</p>
+<table>
+<thead>
+<tr>
+<th>Column 1</th>
+<th>Column 2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>|</code> |</td>
+<td>Pipes are okay in code and escaped. |</td>
+</tr>
+</tbody>
+</table>
+<table>
+<thead>
+<tr>
+<th>Column 1</th>
+<th>Column 2</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>row1</td>
+<td>row1    |</td>
+</tr>
+<tr>
+<td>row2</td>
+<td>row2</td>
+</tr>
+</tbody>
+</table>
+<p>Test header escapes</p>
+<table>
+<thead>
+<tr>
+<th><code>`\</code> |</th>
+<th><code>\</code> |</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>row1</td>
+<td>row1</td>
+</tr>
+<tr>
+<td>row2</td>
+<td>row2</td>
+</tr>
+</tbody>
+</table>
+<p>Escaped pipes in format row should not be a table</p>
+<p>| Column1   | Column2 |
+| ------- || ------- |
+| row1      | row1    |
+| row2      | row2    |</p>
\ No newline at end of file
diff --git a/tests/extensions/extra/tables.txt b/tests/extensions/extra/tables.txt
index a9677ba..d5bd6ea 100644
--- a/tests/extensions/extra/tables.txt
+++ b/tests/extensions/extra/tables.txt
@@ -90,3 +90,34 @@ Odd backticks | Even backticks
 Escapes | More Escapes
 ------- | ------
 `` `\`` | `\`
+
+Only the first backtick can be escaped
+
+Escaped | Bacticks
+------- | ------
+\`` \`  | \`\`
+
+Test escaped pipes
+
+Column 1 | Column 2
+-------- | --------
+`|` \|   | Pipes are okay in code and escaped. \|
+
+| Column 1 | Column 2 |
+| -------- | -------- |
+| row1     | row1    \|
+| row2     | row2     |
+
+Test header escapes
+
+| `` `\`` \| | `\` \|
+| ---------- | ---- |
+| row1       | row1 |
+| row2       | row2 |
+
+Escaped pipes in format row should not be a table
+
+| Column1   | Column2 |
+| ------- \|| ------- |
+| row1      | row1    |
+| row2      | row2    |
diff --git a/tests/test_apis.py b/tests/test_apis.py
index e3de779..7b1214f 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -758,3 +758,15 @@ PLACE_MARKER= ~~~footnotes~~~
 """
         self.create_config_file(config)
         self.assertRaises(yaml.YAMLError, parse_options, ['-c', self.tempfile])
+
+
+class TestEscapeAppend(unittest.TestCase):
+    """ Tests escape character append. """
+
+    def testAppend(self):
+        """ Test that appended escapes are only in the current instance. """
+        md = markdown.Markdown()
+        md.ESCAPED_CHARS.append('|')
+        self.assertEqual('|' in md.ESCAPED_CHARS, True)
+        md2 = markdown.Markdown()
+        self.assertEqual('|' not in md2.ESCAPED_CHARS, True)
-- 
cgit v1.2.3