Tables: Improvements (#530)

Tables now handle escaped pipes when testing, in table borders, and in the inline content. To achieve properly, a bug had to be fixed related to appending escaped chars to the Markdown class. Now appended chars only appear in the current instance. Lastly the first backtick in a table can be escaped rounding out the last corner case.
author: Isaac Muse <faceless.shop@gmail.com> 2017-01-19 06:51:06 -0700
committer: Waylan Limberg <waylan.limberg@icloud.com> 2017-01-19 08:51:06 -0500
commit: c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07 (patch)
tree: 3a304d44324a5d0c9b8eabff7a86cc777163b62a /markdown
parent: b52293b2858138231795aa72aac1cf4799eb8da9 (diff)
download: markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.gz
markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.bz2
markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.zip
2 files changed, 56 insertions, 29 deletions
diff --git a/markdown/__init__.py b/markdown/__init__.py
index 78ea4cb..409f9cf 100644
--- a/markdown/__init__.py
+++ b/markdown/__init__.py
@@ -75,9 +75,6 @@ class Markdown(object):
         'xhtml5': to_xhtml_string,
     }
 
-    ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
-                     '(', ')', '>', '#', '+', '-', '.', '!']
-
     def __init__(self, *args, **kwargs):
         """
         Creates a new Markdown instance.
@@ -147,6 +144,9 @@ class Markdown(object):
                           'deprecated along with "safe_mode".',
                           DeprecationWarning)
 
+        self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
+                              '(', ')', '>', '#', '+', '-', '.', '!']
+
         self.registeredExtensions = []
         self.docType = ""
         self.stripTopLevelTags = True
diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py
index 4bb2076..ebe6ffa 100644
--- a/markdown/extensions/tables.py
+++ b/markdown/extensions/tables.py
@@ -26,28 +26,43 @@ import re
 class TableProcessor(BlockProcessor):
     """ Process Tables. """
 
-    RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))')
+    RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
+    RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
+
+    def __init__(self, parser):
+        self.border = False
+        self.separator = ''
+        super(TableProcessor, self).__init__(parser)
 
     def test(self, parent, block):
-        rows = block.split('\n')
-        return (len(rows) > 1 and '|' in rows[0] and
-                '|' in rows[1] and '-' in rows[1] and
-                rows[1].strip()[0] in ['|', ':', '-'] and
-                set(rows[1]) <= set('|:- '))
+        """
+        Ensure first two rows (column header and separator row) are valid table rows.
+
+        Keep border check and separator row do avoid repeating the work.
+        """
+        is_table = False
+        header = [row.strip() for row in block.split('\n')[0:2]]
+        if len(header) == 2:
+            self.border = header[0].startswith('|')
+            row = self._split_row(header[0])
+            is_table = len(row) > 1
+
+            if is_table:
+                row = self._split_row(header[1])
+                is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ')
+                if is_table:
+                    self.separator = row
+        return is_table
 
     def run(self, parent, blocks):
         """ Parse a table block and build table. """
         block = blocks.pop(0).split('\n')
         header = block[0].strip()
-        seperator = block[1].strip()
         rows = [] if len(block) < 3 else block[2:]
-        # Get format type (bordered by pipes or not)
-        border = False
-        if header.startswith('|'):
-            border = True
+
         # Get alignment of columns
         align = []
-        for c in self._split_row(seperator, border):
+        for c in self.separator:
             c = c.strip()
             if c.startswith(':') and c.endswith(':'):
                 align.append('center')
@@ -57,21 +72,22 @@ class TableProcessor(BlockProcessor):
                 align.append('right')
             else:
                 align.append(None)
+
         # Build table
         table = etree.SubElement(parent, 'table')
         thead = etree.SubElement(table, 'thead')
-        self._build_row(header, thead, align, border)
+        self._build_row(header, thead, align)
         tbody = etree.SubElement(table, 'tbody')
         for row in rows:
-            self._build_row(row.strip(), tbody, align, border)
+            self._build_row(row.strip(), tbody, align)
 
-    def _build_row(self, row, parent, align, border):
+    def _build_row(self, row, parent, align):
         """ Given a row of text, build table cells. """
         tr = etree.SubElement(parent, 'tr')
         tag = 'td'
         if parent.tag == 'thead':
             tag = 'th'
-        cells = self._split_row(row, border)
+        cells = self._split_row(row)
         # We use align here rather than cells to ensure every row
         # contains the same number of columns.
         for i, a in enumerate(align):
@@ -83,13 +99,12 @@ class TableProcessor(BlockProcessor):
             if a:
                 c.set('align', a)
 
-    def _split_row(self, row, border):
+    def _split_row(self, row):
         """ split a row of text into list of cells. """
-        if border:
+        if self.border:
             if row.startswith('|'):
                 row = row[1:]
-            if row.endswith('|'):
-                row = row[:-1]
+            row = self.RE_END_BORDER.sub('', row)
         return self._split(row)
 
     def _split(self, row):
@@ -106,23 +121,33 @@ class TableProcessor(BlockProcessor):
         for m in self.RE_CODE_PIPES.finditer(row):
             # Store ` data (len, start_pos, end_pos)
             if m.group(2):
+                # \`+
+                # Store length of each tic group: subtract \
+                tics.append(len(m.group(2)) - 1)
+                # Store start of group, end of group, and escape length
+                tic_points.append((m.start(2), m.end(2) - 1, 1))
+            elif m.group(3):
                 # `+
                 # Store length of each tic group
-                tics.append(len(m.group(2)))
-                # Store start and end of tic group
-                tic_points.append((m.start(2), m.end(2) - 1))
+                tics.append(len(m.group(3)))
+                # Store start of group, end of group, and escape length
+                tic_points.append((m.start(3), m.end(3) - 1, 0))
             # Store pipe location
-            elif m.group(4):
-                pipes.append(m.start(4))
+            elif m.group(5):
+                pipes.append(m.start(5))
 
         # Pair up tics according to size if possible
+        # Subtract the escape length *only* from the opening.
         # Walk through tic list and see if tic has a close.
         # Store the tic region (start of region, end of region).
         pos = 0
         tic_len = len(tics)
         while pos < tic_len:
             try:
-                index = tics[pos + 1:].index(tics[pos]) + 1
+                tic_size = tics[pos] - tic_points[pos][2]
+                if tic_size == 0:
+                    raise ValueError
+                index = tics[pos + 1:].index(tic_size) + 1
                 tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
                 pos += index + 1
             except ValueError:
@@ -160,6 +185,8 @@ class TableExtension(Extension):
 
     def extendMarkdown(self, md, md_globals):
         """ Add an instance of TableProcessor to BlockParser. """
+        if '|' not in md.ESCAPED_CHARS:
+            md.ESCAPED_CHARS.append('|')
         md.parser.blockprocessors.add('table',
                                       TableProcessor(md.parser),
                                       '<hashheader')
author	Isaac Muse <faceless.shop@gmail.com>	2017-01-19 06:51:06 -0700
committer	Waylan Limberg <waylan.limberg@icloud.com>	2017-01-19 08:51:06 -0500
commit	c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07 (patch)
tree	3a304d44324a5d0c9b8eabff7a86cc777163b62a /markdown
parent	b52293b2858138231795aa72aac1cf4799eb8da9 (diff)
download	markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.gz markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.tar.bz2 markdown-c70b2c4154d9b6e46f282c1f212c52e9fbfa5a07.zip