From ef9a229ebeaf8173e9fd4e541de4d83e8678f649 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan@gmail.com>
Date: Thu, 17 Nov 2011 22:43:02 -0500
Subject: Fixed #47. Improved HRProccessor.\n\nPython's re module does not
 support atomic grouping, which was slowing the HR regex down if a long HR
 ended with a non HR char (casing the regex to backtrack). Therefore, we have
 to simulate atomic grouping. Fortunately, we only need to match end-of-line
 or end-of-string after the atomic group here, so it was an easy case to
 simulate. Just remove the '$' from the end of the regex and manualy check
 using m.end(). The run method was refactored while I was at it, saving us
 from running the regex twice for each HR.

---
 markdown/blockprocessors.py  | 37 +++++++++++++++++++------------------
 tests/misc/para-with-hr.html |  5 ++++-
 tests/misc/para-with-hr.txt  |  3 +++
 3 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 7223da4..de3f136 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -460,35 +460,36 @@ class SetextHeaderProcessor(BlockProcessor):
 class HRProcessor(BlockProcessor):
     """ Process Horizontal Rules. """
 
-    RE = r'[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*'
+    RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*'
     # Detect hr on any line of a block.
-    SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
-    # Match a hr on a single line of text.
-    MATCH_RE = re.compile(r'^%s$' % RE)
+    SEARCH_RE = re.compile(RE, re.MULTILINE)
 
     def test(self, parent, block):
-        return bool(self.SEARCH_RE.search(block))
+        m = self.SEARCH_RE.search(block)
+        # No atomic grouping in python so we simulate it here for performance.
+        # The regex only matches what would be in the atomic group - the HR.
+        # Then check if we are at end of block or if next char is a newline.
+        if m and (m.end() == len(block) or block[m.end()] == '\n'):
+            # Save match object on class instance so we can use it later.
+            self.match = m
+            return True
+        return False
 
     def run(self, parent, blocks):
-        lines = blocks.pop(0).split('\n')
-        prelines = []
+        block = blocks.pop(0)
         # Check for lines in block before hr.
-        for line in lines:
-            m = self.MATCH_RE.match(line)
-            if m:
-                break
-            else:
-                prelines.append(line)
-        if len(prelines):
+        prelines = block[:self.match.start()].rstrip('\n')
+        if prelines:
             # Recursively parse lines before hr so they get parsed first.
-            self.parser.parseBlocks(parent, ['\n'.join(prelines)])
+            self.parser.parseBlocks(parent, [prelines])
         # create hr
         hr = util.etree.SubElement(parent, 'hr')
         # check for lines in block after hr.
-        lines = lines[len(prelines)+1:]
-        if len(lines):
+        postlines = block[self.match.end():].lstrip('\n')
+        if postlines:
             # Add lines after hr to master blocks for later parsing.
-            blocks.insert(0, '\n'.join(lines))
+            blocks.insert(0, postlines)
+
 
 
 class EmptyBlockProcessor(BlockProcessor):
diff --git a/tests/misc/para-with-hr.html b/tests/misc/para-with-hr.html
index 8569fec..7607449 100644
--- a/tests/misc/para-with-hr.html
+++ b/tests/misc/para-with-hr.html
@@ -1,3 +1,6 @@
 <p>Here is a paragraph, followed by a horizontal rule.</p>
 <hr />
-<p>Followed by another paragraph.</p>
\ No newline at end of file
+<p>Followed by another paragraph.</p>
+<p>Here is another paragraph, followed by:
+*** not an HR.
+Followed by more of the same paragraph.</p>
\ No newline at end of file
diff --git a/tests/misc/para-with-hr.txt b/tests/misc/para-with-hr.txt
index 20735fb..165bbe3 100644
--- a/tests/misc/para-with-hr.txt
+++ b/tests/misc/para-with-hr.txt
@@ -2,3 +2,6 @@ Here is a paragraph, followed by a horizontal rule.
 ***
 Followed by another paragraph.
 
+Here is another paragraph, followed by:
+*** not an HR.
+Followed by more of the same paragraph.
-- 
cgit v1.2.3