aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rwxr-xr-x[-rw-r--r--]markdown.py1208
-rwxr-xr-x[-rw-r--r--]mdx_codehilite.py10
-rwxr-xr-x[-rw-r--r--]mdx_footnotes.py117
-rwxr-xr-x[-rw-r--r--]mdx_headerid.py16
-rwxr-xr-x[-rw-r--r--]mdx_imagelinks.py0
-rwxr-xr-x[-rw-r--r--]mdx_rss.py103
-rwxr-xr-x[-rw-r--r--]mdx_wikilink.py17
-rw-r--r--test-markdown.py7
-rw-r--r--tests/extensions-x-footnotes/footnote.html82
-rw-r--r--tests/markdown-test/amps-and-angle-encoding.html30
-rwxr-xr-xtests/markdown-test/angle-links-and-img.html7
-rwxr-xr-xtests/markdown-test/angle-links-and-img.txt4
-rw-r--r--tests/markdown-test/auto-links.html31
-rw-r--r--tests/markdown-test/backlash-escapes.html67
-rw-r--r--tests/markdown-test/benchmark.dat36
-rw-r--r--tests/markdown-test/blockquotes-with-dode-blocks.html21
-rw-r--r--tests/markdown-test/hard-wrapped.html15
-rw-r--r--tests/markdown-test/horizontal-rules.html72
-rw-r--r--tests/markdown-test/inline-html-advanced.html17
-rw-r--r--tests/markdown-test/inline-html-comments.html23
-rw-r--r--tests/markdown-test/inline-html-simple.html101
-rw-r--r--tests/markdown-test/links-inline.html22
-rw-r--r--tests/markdown-test/links-reference.html32
-rw-r--r--tests/markdown-test/literal-quotes.html9
-rw-r--r--tests/markdown-test/markdown-documentation-basics.html307
-rw-r--r--tests/markdown-test/markdown-syntax.html1306
-rw-r--r--tests/markdown-test/nested-blockquotes.html14
-rw-r--r--tests/markdown-test/ordered-and-unordered-list.html341
-rw-r--r--tests/markdown-test/strong-and-em-together.html23
-rw-r--r--tests/markdown-test/tabs.html46
-rw-r--r--tests/markdown-test/tidyness.html22
-rw-r--r--tests/misc/CRLF_line_ends.html7
-rw-r--r--tests/misc/adjacent-headers.html7
-rw-r--r--tests/misc/amp-in-url.html8
-rw-r--r--tests/misc/ampersand.html9
-rw-r--r--tests/misc/arabic.html46
-rw-r--r--tests/misc/attributes2.html16
-rw-r--r--tests/misc/backtick-escape.html3
-rw-r--r--tests/misc/backtick-escape.txt3
-rw-r--r--tests/misc/bidi.html85
-rw-r--r--tests/misc/blank-block-quote.html10
-rwxr-xr-xtests/misc/blockquote-below-paragraph.html6
-rwxr-xr-xtests/misc/blockquote-below-paragraph.txt3
-rw-r--r--tests/misc/blockquote-hr.html31
-rw-r--r--tests/misc/blockquote.html47
-rw-r--r--tests/misc/bold_links.html6
-rw-r--r--tests/misc/br.html17
-rw-r--r--tests/misc/bracket_re.html119
-rw-r--r--tests/misc/code-first-line.html6
-rw-r--r--tests/misc/comments.html15
-rw-r--r--tests/misc/div.html12
-rw-r--r--tests/misc/email.html5
-rw-r--r--tests/misc/funky-list.html33
-rw-r--r--tests/misc/h1.html8
-rw-r--r--tests/misc/hash.html23
-rw-r--r--tests/misc/headers.html13
-rw-r--r--tests/misc/hline.html7
-rwxr-xr-xtests/misc/html-comments.html2
-rwxr-xr-xtests/misc/html-comments.txt2
-rw-r--r--tests/misc/html.html14
-rw-r--r--tests/misc/image-2.html15
-rw-r--r--tests/misc/image.html6
-rw-r--r--tests/misc/image_in_links.html7
-rw-r--r--tests/misc/inside_html.html6
-rw-r--r--tests/misc/japanese.html31
-rw-r--r--tests/misc/lazy-block-quote.html13
-rw-r--r--tests/misc/link-with-parenthesis.html3
-rw-r--r--tests/misc/link-with-parenthesis.txt1
-rw-r--r--tests/misc/lists.html82
-rw-r--r--tests/misc/lists2.html13
-rw-r--r--tests/misc/lists3.html13
-rw-r--r--tests/misc/lists4.html25
-rw-r--r--tests/misc/lists5.html31
-rw-r--r--tests/misc/markup-inside-p.html26
-rw-r--r--tests/misc/mismatched-tags.html8
-rw-r--r--tests/misc/missing-link-def.html3
-rw-r--r--tests/misc/missing-link-def.txt4
-rw-r--r--tests/misc/more_comments.html8
-rw-r--r--tests/misc/multi-line-tags.html12
-rw-r--r--tests/misc/multi-paragraph-block-quote.html16
-rw-r--r--tests/misc/multi-test.html41
-rw-r--r--tests/misc/multiline-comments.html22
-rw-r--r--tests/misc/nested-patterns.html38
-rw-r--r--tests/misc/nested-patterns.txt7
-rw-r--r--tests/misc/normalize.html8
-rw-r--r--tests/misc/numeric-entity.html9
-rw-r--r--tests/misc/php.html17
-rw-r--r--tests/misc/pre.html13
-rw-r--r--tests/misc/russian.html14
-rw-r--r--tests/misc/some-test.html126
-rw-r--r--tests/misc/span.html15
-rw-r--r--tests/misc/strong-with-underscores.html5
-rw-r--r--tests/misc/stronintags.html14
-rw-r--r--tests/misc/tabs-in-lists.html91
-rw-r--r--tests/misc/two-spaces.html43
-rw-r--r--tests/misc/uche.html17
-rw-r--r--tests/misc/underscores.html22
-rw-r--r--tests/misc/url_spaces.html11
-rw-r--r--tests/safe_mode/inline-html-advanced.html13
-rw-r--r--tests/safe_mode/inline-html-comments.html20
-rw-r--r--tests/safe_mode/inline-html-simple.html97
-rw-r--r--tests/safe_mode/script_tags.html34
-rw-r--r--tests/safe_mode/script_tags.txt2
-rw-r--r--tests/safe_mode/unsafe_urls.html44
105 files changed, 2653 insertions, 3064 deletions
diff --git a/.gitignore b/.gitignore
index 2ac1b58..64c7154 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
*.bak
*.tmp
tmp/*
+__init__.py
+markdown_old.py
diff --git a/markdown.py b/markdown.py
index 86f83ab..059ac87 100644..100755
--- a/markdown.py
+++ b/markdown.py
@@ -33,13 +33,15 @@ __revision__ = "$Rev$"
-import re, sys, codecs
+import re, sys, codecs, htmlentitydefs
from urlparse import urlparse, urlunparse
from logging import getLogger, StreamHandler, Formatter, \
DEBUG, INFO, WARN, ERROR, CRITICAL
+
+
MESSAGE_THRESHOLD = CRITICAL
@@ -56,13 +58,59 @@ logger.addHandler(console_hndlr)
def message(level, text):
''' A wrapper method for logging debug messages. '''
logger.log(level, text)
-
+
+def isstr(s):
+ return isinstance(s, unicode) or isinstance(s, str)
+
+def importETree():
+ """ Imports best variant of ElementTree
+ and returns module object """
+
+ try:
+ # Python 2.5+
+ import xml.etree.cElementTree as etree
+ except ImportError:
+ try:
+ # Python 2.5+
+ import xml.etree.ElementTree as etree
+ except ImportError:
+ try:
+ # normal cElementTree install
+ import cElementTree as etree
+ except ImportError:
+ try:
+ # normal ElementTree install
+ import elementtree.ElementTree as etree
+ except ImportError:
+ message(CRITICAL,
+ "Failed to import ElementTree from any known place")
+ sys.exit(1)
+ return etree
+
+etree = importETree()
+
+def indentETree(elem, level=0):
+
+ if level > 1:
+ i = "\n" + (level-1)*" "
+ else:
+ i = "\n"
+
+ if len(elem):
+ if not elem.text or not elem.text.strip():
+ elem.text = i + " "
+ for e in elem:
+ indentETree(e, level+1)
+ if not e.tail or not e.tail.strip():
+ e.tail = i
+ if level and (not elem.tail or not elem.tail.strip()):
+ elem.tail = i
# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
TAB_LENGTH = 4 # expand tabs to this many spaces
ENABLE_ATTRIBUTES = True # @id = xyz -> <... id="xyz">
-SMART_EMPHASIS = 1 # this_or_that does not become this<i>or</i>that
+SMART_EMPHASIS = True # this_or_that does not become this<i>or</i>that
HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
@@ -107,6 +155,11 @@ EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"
# --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ----------
+AND_SUBSTITUTE = unichr(2) + unichr(4) + unichr(3)
+
+INLINE_PLACEHOLDER_PREFIX = u'\u0001'
+INLINE_PLACEHOLDER_SUFFIX = u'\u0002'
+
# a template for html placeholders
START = u'\u0001'
END = u'\u0002'
@@ -126,330 +179,23 @@ def isBlockLevel (tag):
return ( (tag in BLOCK_LEVEL_ELEMENTS) or
(tag[0] == 'h' and tag[1] in "0123456789") )
-"""
-======================================================================
-========================== NANODOM ===================================
-======================================================================
-
-The three classes below implement some of the most basic DOM
-methods. I use this instead of minidom because I need a simpler
-functionality and do not want to require additional libraries.
-
-Importantly, NanoDom does not do normalization, which is what we
-want. It also adds extra white space when converting DOM to string
-"""
-
-ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&amp;"),
- (re.compile("<"), "&lt;"),
- (re.compile(">"), "&gt;")]
-
-ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&amp;"),
- (re.compile("<"), "&lt;"),
- (re.compile(">"), "&gt;"),
- (re.compile("\""), "&quot;")]
-
-
-def getBidiType(text):
- """
- Get Bi-directional text type. Used by TextNode to determine text direction.
- """
-
- if not text: return None
-
- ch = text[0]
-
- if not isinstance(ch, unicode) or not ch.isalpha():
- return None
+def codepoint2name(code):
+ """ Returns entity definition by code, or code
+ if there is no such entity definition"""
+ entity = htmlentitydefs.codepoint2name.get(code)
+ if entity:
+ return "%s%s;" % (AND_SUBSTITUTE, entity)
else:
-
- for min, max in RTL_BIDI_RANGES:
- if ( ch >= min and ch <= max ):
- return "rtl"
- else:
- return "ltr"
-
-
-class Document:
- """
- Document root of the NanoDom. An instance stores DOM elements as children.
-
- """
-
- def __init__ (self):
- """ Create a NanoDom document. """
- self.bidi = "ltr"
-
- def appendChild(self, child):
- """ Add a dom element as a child of the document root. """
- self.documentElement = child
- child.isDocumentElement = True
- child.parent = self
- self.entities = {}
-
- def setBidi(self, bidi):
- """ Set text direction (right-left or left-right)."""
- if bidi:
- self.bidi = bidi
-
- def createElement(self, tag, textNode=None):
- """ Given a tag or textNode, return a dom element. """
- el = Element(tag)
- el.doc = self
- if textNode:
- el.appendChild(self.createTextNode(textNode))
- return el
-
- def createTextNode(self, text):
- """ Return given text as a TextNode. """
- node = TextNode(text)
- node.doc = self
- return node
-
- def createEntityReference(self, entity):
- """ Return an html entitry reference (i.e.: `&amp;`). """
- if entity not in self.entities:
- self.entities[entity] = EntityReference(entity)
- return self.entities[entity]
-
- def createCDATA(self, text):
- """ Return the given text as a CDATA node. """
- node = CDATA(text)
- node.doc = self
- return node
-
- def toxml (self):
- """ Convert document to xml and return a string. """
- return self.documentElement.toxml()
-
- def normalizeEntities(self, text, avoidDoubleNormalizing=False):
- """ Return the given text as an html entity (i.e.: `<` => `&gt;`). """
- if avoidDoubleNormalizing:
- regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT
- else:
- regexps = ENTITY_NORMALIZATION_EXPRESSIONS
-
- for regexp, substitution in regexps:
- text = regexp.sub(substitution, text)
- return text
-
- def find(self, test):
- """ Return a list of descendants that pass the test function """
- return self.documentElement.find(test)
-
- def unlink(self):
- """ Cleanup: Remove all children from the document. """
- self.documentElement.unlink()
- self.documentElement = None
-
-
-class CDATA:
- """ CDATA node type of NanoDom. """
- type = "cdata"
-
- def __init__ (self, text):
- """ Create a CDATA node with given text. """
- self.text = text
-
- def handleAttributes(self):
- """ Not implemented for CDATA node type. """
- pass
-
- def toxml (self):
- """ Return CDATA node as a string. """
- return "<![CDATA[" + self.text + "]]>"
-
-class Element:
- """
- Element node type of Nanodom.
+ return "%s#%d;" % (AND_SUBSTITUTE, code)
- All html tags would most likely be represented as Elements.
-
- """
- type = "element"
-
- def __init__ (self, tag):
- """ Create an Element node instance. """
- self.nodeName = tag
- self.attributes = []
- self.attribute_values = {}
- self.childNodes = []
- self.bidi = None
- self.isDocumentElement = False
-
- def setBidi(self, bidi):
- """ Set text direction (i.e.: right-left or left-right). """
- if bidi:
-
- orig_bidi = self.bidi
-
- if not self.bidi or self.isDocumentElement:
- # Once the bidi is set don't change it (except for doc element)
- self.bidi = bidi
- self.parent.setBidi(bidi)
-
-
- def unlink(self):
- """ Cleanup: Remove all children of the Element. """
- for child in self.childNodes:
- if child.type == "element":
- child.unlink()
- self.childNodes = None
-
- def setAttribute(self, attr, value):
- """
- Assign an html/xml attribute to the Element (i.e.: id, class, href).
- """
- if not attr in self.attributes:
- self.attributes.append(attr)
-
- self.attribute_values[attr] = value
-
- def insertChild(self, position, child):
- """ Insert a child Element at the given position. """
- self.childNodes.insert(position, child)
- child.parent = self
-
- def removeChild(self, child):
- """ Remove the given child from the Element. """
- self.childNodes.remove(child)
-
- def replaceChild(self, oldChild, newChild):
- """ Replace an old child Element with a new child Element. """
- position = self.childNodes.index(oldChild)
- self.removeChild(oldChild)
- self.insertChild(position, newChild)
-
- def appendChild(self, child):
- """ Append a new child Element to the end of the child Elements. """
- self.childNodes.append(child)
- child.parent = self
-
- def handleAttributes(self):
- """ Not implemented for Element node type. """
- pass
-
- def find(self, test, depth=0):
- """ Returns a list of descendants that pass the test function """
- matched_nodes = []
- for child in self.childNodes:
- if test(child):
- matched_nodes.append(child)
- if child.type == "element":
- matched_nodes += child.find(test, depth+1)
- return matched_nodes
-
- def toxml(self):
- """ Return the Element and all children as a string. """
- if ENABLE_ATTRIBUTES:
- for child in self.childNodes:
- child.handleAttributes()
-
- buffer = ""
- if self.nodeName in ['h1', 'h2', 'h3', 'h4']:
- buffer += "\n"
- elif self.nodeName in ['li']:
- buffer += "\n "
-
- # Process children FIRST, then do the attributes
-
- childBuffer = ""
-
- if self.childNodes or self.nodeName in ['blockquote']:
- childBuffer += ">"
- for child in self.childNodes:
- childBuffer += child.toxml()
- if self.nodeName == 'p':
- childBuffer += "\n"
- elif self.nodeName == 'li':
- childBuffer += "\n "
- childBuffer += "</%s>" % self.nodeName
- else:
- childBuffer += "/>"
-
-
-
- buffer += "<" + self.nodeName
-
- if self.nodeName in ['p', 'li', 'ul', 'ol',
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
-
- if not self.attribute_values.has_key("dir"):
- if self.bidi:
- bidi = self.bidi
- else:
- bidi = self.doc.bidi
-
- if bidi=="rtl":
- self.setAttribute("dir", "rtl")
-
- for attr in self.attributes:
- value = self.attribute_values[attr]
- value = self.doc.normalizeEntities(value,
- avoidDoubleNormalizing=True)
- buffer += ' %s="%s"' % (attr, value)
-
-
- # Now let's actually append the children
-
- buffer += childBuffer
-
- if self.nodeName in ['p', 'br ', 'li', 'ul', 'ol',
- 'h1', 'h2', 'h3', 'h4'] :
- buffer += "\n"
-
- return buffer
-
-
-class TextNode:
- """ A Text node type of the NanoDom. """
- type = "text"
- attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
-
- def __init__ (self, text):
- """ Create a TextNode with the given text. """
- self.value = text
-
- def attributeCallback(self, match):
- """ Regex callback method to set attribute on parent. """
- self.parent.setAttribute(match.group(1), match.group(2))
-
- def handleAttributes(self):
- """ Parse and assign attributes to the parent Element. """
- self.value = self.attrRegExp.sub(self.attributeCallback, self.value)
-
- def toxml(self):
- """ Return the TextNode as a string. """
- text = self.value
-
- self.parent.setBidi(getBidiType(text))
+def handleAttributes(text, parent):
- if not text.startswith(HTML_PLACEHOLDER_PREFIX):
- if self.parent.nodeName == "p":
- text = text.replace("\n", "\n ")
- elif (self.parent.nodeName == "li"
- and self.parent.childNodes[0]==self):
- text = "\n " + text.replace("\n", "\n ")
- text = self.doc.normalizeEntities(text)
- return text
-
-
-class EntityReference:
- """ EntityReference node type of NanoDom. """
- type = "entity_ref"
-
- def __init__(self, entity):
- """ Create an EntityReference of the given entity. """
- self.entity = entity
-
- def handleAttributes(self):
- """ Not implemented for EntityReference. """
- pass
-
- def toxml(self):
- """ Return the EntityReference as a string. """
- return "&" + self.entity + ";"
+ def attributeCallback(match):
+ parent.set(match.group(1), match.group(2))
+ return RE.regExp['attr'].sub(attributeCallback, text)
+
"""
======================================================================
@@ -613,7 +359,7 @@ class HeaderPreprocessor(Preprocessor):
"""
Replace underlined headers with hashed headers to avoid
- the nead for lookahead later.
+ the need for lookahead later.
"""
def run (self, lines):
@@ -656,29 +402,32 @@ class LinePreprocessor(Preprocessor):
blockquote_re = re.compile(r'^(> )+')
def run (self, lines):
- """ Find a store HR lines. """
+ """ Find and replace HR lines. """
for i in range(len(lines)):
prefix = ''
m = self.blockquote_re.search(lines[i])
- if m : prefix = m.group(0)
+ if m:
+ prefix = m.group(0)
if self._isLine(lines[i][len(prefix):]):
- lines[i] = prefix + self.stash.store("<hr />", safe=True)
+ #lines[i] = prefix + self.stash.store("<hr />", safe=True)
+ lines[i] = prefix + "___"
return lines
def _isLine(self, block):
"""Determine if a block should be replaced with an <HR>"""
- if block.startswith(" "): return 0 # a code block
+ if block.startswith(" "):
+ return False # a code block
text = "".join([x for x in block if not x.isspace()])
if len(text) <= 2:
- return 0
+ return False
for pattern in ['isline1', 'isline2', 'isline3']:
m = RE.regExp[pattern].match(text)
if (m and m.group(1)):
- return 1
+ return True
else:
- return 0
+ return False
LINE_PREPROCESSOR = LinePreprocessor()
@@ -713,6 +462,7 @@ class ReferencePreprocessor(Preprocessor):
REFERENCE_PREPROCESSOR = ReferencePreprocessor()
+
"""
======================================================================
========================== INLINE PATTERNS ===========================
@@ -725,9 +475,8 @@ expression and needs support the following methods:
pattern.getCompiledRegExp() - returns a regular expression
- pattern.handleMatch(m, doc) - takes a match object and returns
- a NanoDom node (as a part of the provided
- doc) or None
+ pattern.handleMatch(m) - takes a match object and returns
+ a ElementTree element or just plain text
All of python markdown's built-in patterns subclass from Pattern,
but you can add additional patterns that don't.
@@ -765,32 +514,31 @@ BRK = ( r'\[('
+ NOBRACKET + r')\]' )
NOIMG = r'(?<!\!)'
-BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2`
-DOUBLE_BACKTICK_RE = r'\`\`(.*?)\`\`' # ``e=f("`")``
+BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' # `e=f()` or ``e=f("`")``
ESCAPE_RE = r'\\(.)' # \<
-EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis*
-STRONG_RE = r'\*\*(.*?)\*\*' # **strong**
-STRONG_EM_RE = r'\*\*\*(.*?)\*\*\*' # ***strong***
+EMPHASIS_RE = r'(\*)([^\*]*)\2' # *emphasis*
+STRONG_RE = r'(\*{2}|_{2})(.*?)\2' # **strong**
+STRONG_EM_RE = r'(\*{3}|_{3})(.*?)\2' # ***strong***
if SMART_EMPHASIS:
- EMPHASIS_2_RE = r'(?<!\S)_(\S[^_]*)_' # _emphasis_
+ EMPHASIS_2_RE = r'(?<!\S)(_)(\S.*?)\2' # _emphasis_
else:
- EMPHASIS_2_RE = r'_([^_]*)_' # _emphasis_
+ EMPHASIS_2_RE = r'(_)(.*?)\2' # _emphasis_
-STRONG_2_RE = r'__(.*?)__' # __strong__
-STRONG_EM_2_RE = r'___(.*?)___' # ___strong___
+#LINK_RE = NOIMG + BRK + r'\s*\(([^\)]*)\)' # [text](url)
-LINK_RE = NOIMG + BRK + r'\s*\(([^\)]*)\)' # [text](url)
-LINK_ANGLED_RE = NOIMG + BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
-IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/)
+LINK_RE = NOIMG + BRK + \
+r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*)\12)?\)''' # [text](url) or [text](<url>)
+
+IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)' # ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
NOT_STRONG_RE = r'( \* )' # stand-alone * or _
AUTOLINK_RE = r'<((?:f|ht)tps?://[^>]*)>' # <http://www.123.com>
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com>
#HTML_RE = r'(\<[^\>]*\>)' # <...>
-HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)' # <...>
-ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &amp;
+HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' # <...>
+ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &amp;
LINE_BREAK_RE = r' \n' # two spaces at end of line
LINE_BREAK_2_RE = r' $' # two spaces at end of text
@@ -816,30 +564,35 @@ class Pattern:
""" Return a compiled regular expression. """
return self.compiled_re
- def handleMatch(self, m, doc):
+ def handleMatch(self, m):
"""
- Return a NanoDom element from the given match. Subclasses should
+ Return a ElementTree element from the given match. Subclasses should
override this method.
Keyword arguments:
* m: A re match object containing a match of the pattern.
- * doc: An instance of a NanoDom Document.
-
"""
pass
+
+ def type(self):
+ """ Return class name, to define pattern type """
+ return self.__class__.__name__
BasePattern = Pattern # for backward compatibility
class SimpleTextPattern (Pattern):
- """ Return a simple TextNode of group(2) of a Pattern. """
- def handleMatch(self, m, doc):
- return doc.createTextNode(m.group(2))
+ """ Return a simple text of group(2) of a Pattern. """
+ def handleMatch(self, m):
+ text = m.group(2)
+ if text == INLINE_PLACEHOLDER_PREFIX:
+ return None
+ return text
class SimpleTagPattern (Pattern):
"""
- Return NanoDom Element of type `tag` with a child TextNode of group(2)
+ Return element of type `tag` with a text attribute of group(3)
of a Pattern.
"""
@@ -847,71 +600,69 @@ class SimpleTagPattern (Pattern):
Pattern.__init__(self, pattern)
self.tag = tag
- def handleMatch(self, m, doc):
- el = doc.createElement(self.tag)
- el.appendChild(doc.createTextNode(m.group(2)))
+ def handleMatch(self, m):
+ el = etree.Element(self.tag)
+ el.text = m.group(3)
return el
class SubstituteTagPattern (SimpleTagPattern):
- """ Return a NanoDom ELement of type `tag` with no children. """
- def handleMatch (self, m, doc):
- return doc.createElement(self.tag)
+ """ Return a eLement of type `tag` with no children. """
+ def handleMatch (self, m):
+ return etree.Element(self.tag)
class BacktickPattern (Pattern):
- """ Return a NanoDom `<code>` Element containing the matching text. """
+ """ Return a `<code>` element containing the matching text. """
def __init__ (self, pattern):
Pattern.__init__(self, pattern)
self.tag = "code"
- def handleMatch(self, m, doc):
- el = doc.createElement(self.tag)
- text = m.group(2).strip()
- #text = text.replace("&", "&amp;")
- el.appendChild(doc.createTextNode(text))
+ def handleMatch(self, m):
+ el = etree.Element(self.tag)
+ el.text = m.group(3).strip()
return el
class DoubleTagPattern (SimpleTagPattern):
"""
- Return a TextNode nested in tag2 nested in tag1.
+ Return a ElementTree element nested in tag2 nested in tag1.
Usefull for strong emphasis etc.
"""
- def handleMatch(self, m, doc):
+ def handleMatch(self, m):
tag1, tag2 = self.tag.split(",")
- el1 = doc.createElement(tag1)
- el2 = doc.createElement(tag2)
- el1.appendChild(el2)
- el2.appendChild(doc.createTextNode(m.group(2)))
+ el1 = etree.Element(tag1)
+ el2 = etree.SubElement(el1, tag2)
+ el2.text = m.group(3)
return el1
class HtmlPattern (Pattern):
""" Store raw inline html and return a placeholder. """
- def handleMatch (self, m, doc):
+ def handleMatch (self, m):
rawhtml = m.group(2)
inline = True
place_holder = self.stash.store(rawhtml)
- return doc.createTextNode(place_holder)
+ return place_holder
class LinkPattern (Pattern):
- """ Return a NanoDom link Element from the given match. """
- def handleMatch(self, m, doc):
- el = doc.createElement('a')
- el.appendChild(doc.createTextNode(m.group(2)))
- parts = m.group(9).split('"')
- # We should now have [], [href], or [href, title]
- if parts:
- el.setAttribute('href', self.sanatize_url(parts[0].strip()))
+ """ Return a link element from the given match. """
+ def handleMatch(self, m):
+ el = etree.Element("a")
+ el.text = m.group(2)
+ title = m.group(11)
+ href = m.group(9)
+ if href:
+ if href[0] == "<":
+ href = href[1:-1]
+ el.set("href", self.sanatize_url(href.strip()))
else:
- el.setAttribute('href', "")
- if len(parts) > 1:
- # we also got a title
- title = '"' + '"'.join(parts[1:]).strip()
+ el.set("href", "")
+
+ if title:
title = dequote(title) #.replace('"', "&quot;")
- el.setAttribute('title', title)
+ el.set("title", title)
return el
def sanatize_url(self, url):
@@ -947,30 +698,32 @@ class LinkPattern (Pattern):
return urlunparse(url)
class ImagePattern(LinkPattern):
- """ Return a NanoDom img Element from the given match. """
- def handleMatch(self, m, doc):
- el = doc.createElement('img')
+ """ Return a img element from the given match. """
+
+ def handleMatch(self, m):
+ el = etree.Element("img")
src_parts = m.group(9).split()
if src_parts:
- el.setAttribute('src', self.sanatize_url(src_parts[0]))
+ src = src_parts[0]
+ if src[0] == "<" and src[-1] == ">":
+ src = src[1:-1]
+ el.set('src', self.sanatize_url(src))
else:
- el.setAttribute('src', "")
+ el.set('src', "")
if len(src_parts) > 1:
- el.setAttribute('title', dequote(" ".join(src_parts[1:])))
+ el.set('title', dequote(" ".join(src_parts[1:])))
+
if ENABLE_ATTRIBUTES:
- text = doc.createTextNode(m.group(2))
- el.appendChild(text)
- text.handleAttributes()
- truealt = text.value
- el.childNodes.remove(text)
+ truealt = handleAttributes(m.group(2), el)
else:
truealt = m.group(2)
- el.setAttribute('alt', truealt)
+
+ el.set('alt', truealt)
return el
class ReferencePattern(LinkPattern):
- """ Match to a stored reference and return a NanoDom link Element. """
- def handleMatch(self, m, doc):
+ """ Match to a stored reference and return link element. """
+ def handleMatch(self, m):
if m.group(9):
id = m.group(9).lower()
@@ -982,35 +735,38 @@ class ReferencePattern(LinkPattern):
if not self.references.has_key(id): # ignore undefined refs
return None
href, title = self.references[id]
+
text = m.group(2)
- return self.makeTag(href, title, text, doc)
+ return self.makeTag(href, title, text)
- def makeTag(self, href, title, text, doc):
- el = doc.createElement('a')
- el.setAttribute('href', self.sanatize_url(href))
+ def makeTag(self, href, title, text):
+ el = etree.Element('a')
+
+ el.set('href', self.sanatize_url(href))
if title:
- el.setAttribute('title', title)
- el.appendChild(doc.createTextNode(text))
+ el.set('title', title)
+
+ el.text = text
return el
class ImageReferencePattern (ReferencePattern):
- """ Match to a stored reference and return a NanoDom img Element. """
- def makeTag(self, href, title, text, doc):
- el = doc.createElement('img')
- el.setAttribute('src', self.sanatize_url(href))
+ """ Match to a stored reference and return img element. """
+ def makeTag(self, href, title, text):
+ el = etree.Element("img")
+ el.set("src", self.sanatize_url(href))
if title:
- el.setAttribute('title', title)
- el.setAttribute('alt', text)
+ el.set("title", title)
+ el.set("alt", text)
return el
class AutolinkPattern (Pattern):
""" Return a link Element given an autolink (`<http://example/com>`). """
- def handleMatch(self, m, doc):
- el = doc.createElement('a')
- el.setAttribute('href', m.group(2))
- el.appendChild(doc.createTextNode(m.group(2)))
+ def handleMatch(self, m):
+ el = etree.Element("a")
+ el.set('href', m.group(2))
+ el.text = m.group(2)
return el
class AutomailPattern (Pattern):
@@ -1018,37 +774,35 @@ class AutomailPattern (Pattern):
Return a mailto link Element given an automail link (`<foo@example.com>`).
"""
- def handleMatch(self, m, doc):
- el = doc.createElement('a')
+ def handleMatch(self, m):
+ el = etree.Element('a')
email = m.group(2)
if email.startswith("mailto:"):
email = email[len("mailto:"):]
+ el.text = ""
for letter in email:
- entity = doc.createEntityReference("#%d" % ord(letter))
- el.appendChild(entity)
+ el.text += codepoint2name(ord(letter))
+
mailto = "mailto:" + email
- mailto = "".join(['&#%d;' % ord(letter) for letter in mailto])
- el.setAttribute('href', mailto)
+ mailto = "".join([AND_SUBSTITUTE + '#%d;' %
+ ord(letter) for letter in mailto])
+ el.set('href', mailto)
return el
ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE)
NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE)
BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE)
-DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE)
STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong')
-STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong')
EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em')
EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
-STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
LINK_PATTERN = LinkPattern(LINK_RE)
-LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE)
IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE)
@@ -1075,10 +829,10 @@ There are two types of post-processors: Postprocessor and TextPostprocessor
class Postprocessor:
"""
- Postprocessors are run before the dom it converted back into text.
+ Postprocessors are run before the ElementTree serialization.
Each Postprocessor implements a "run" method that takes a pointer to a
- NanoDom document, modifies it as necessary and returns a NanoDom
+ ElementTree, modifies it as necessary and returns a ElementTree
document.
Postprocessors must extend markdown.Postprocessor.
@@ -1088,11 +842,10 @@ class Postprocessor:
"""
- def run(self, dom):
+ def run(self, et):
"""
Subclasses of Postprocessor should implement a `run` method, which
- takes a NanoDOm document and returns a (possably modified) NanoDom
- document.
+ takes a ElementTree and returns a (possably modified) ElementTree.
"""
pass
@@ -1101,7 +854,7 @@ class Postprocessor:
class TextPostprocessor:
"""
- TextPostprocessors are run after the dom it converted back into text.
+ TextPostprocessors are run after the ElementTree it converted back into text.
Each TextPostprocessor implements a "run" method that takes a pointer to a
text string, modifies it as necessary and returns a text string.
@@ -1152,6 +905,20 @@ class RawHtmlTextPostprocessor(TextPostprocessor):
RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
+
+class AndSubstitutePostprocessor(TextPostprocessor):
+ """ Restore valid entities """
+ def __init__(self):
+ pass
+
+ def run(self, text):
+
+ text = text.replace(AND_SUBSTITUTE, "&")
+ return text
+
+ANDSUBSTITUTETEXTPOSTPROCESSOR = AndSubstitutePostprocessor()
+
+
"""
======================================================================
========================== MISC AUXILIARY CLASSES ====================
@@ -1280,7 +1047,47 @@ def dequote(string):
return string[1:-1]
else:
return string
-
+
+
+class InlineStash:
+
+ def __init__(self):
+ self.prefix = INLINE_PLACEHOLDER_PREFIX
+ self.suffix = INLINE_PLACEHOLDER_SUFFIX
+ self._nodes = {}
+ self.phLength = 4 + len(self.prefix) + len(self.suffix)
+
+ def _genPlaceholder(self, type):
+ """ Generates placeholder """
+ id = "%04d" % len(self._nodes)
+ hash = "%s%s:%s%s" % (self.prefix, type, id,
+ self.suffix)
+ return hash, id
+
+ def extractId(self, data, index):
+ """ Extracting id from data string, starting from index """
+ endIndex = data.find(self.suffix, index+1)
+ if endIndex == -1:
+ return None, index + 1
+ else:
+ pair = data[index + len(self.prefix): endIndex].split(":")
+ if len(pair) == 2:
+ return pair[1], endIndex + len(self.suffix)
+ else:
+ return None, index + 1
+
+ def isin(self, id):
+ return self._nodes.has_key(id)
+
+ def get(self, id):
+ """ Returns node by id """
+ return self._nodes.get(id)
+
+ def add(self, node, type):
+ pholder, id = self._genPlaceholder(type)
+ self._nodes[id] = node
+ return pholder
+
"""
======================================================================
========================== CORE MARKDOWN =============================
@@ -1318,6 +1125,7 @@ class CorePatterns:
re.DOTALL)
self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M)
+ self.regExp['attr'] = re.compile("\{@([^\}]*)=([^\}]*)}") # {@id=123}
RE = CorePatterns()
@@ -1353,8 +1161,8 @@ class Markdown:
self.safeMode = safe_mode
self.blockGuru = BlockGuru()
self.registeredExtensions = []
- self.stripTopLevelTags = 1
self.docType = ""
+ self.stripTopLevelTags = True
self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
@@ -1370,20 +1178,19 @@ class Markdown:
self.textPostprocessors = [# a footnote postprocessor will get
# inserted here
- RAWHTMLTEXTPOSTPROCESSOR]
+ RAWHTMLTEXTPOSTPROCESSOR,
+ ANDSUBSTITUTETEXTPOSTPROCESSOR]
self.prePatterns = []
-
-
- self.inlinePatterns = [DOUBLE_BACKTICK_PATTERN,
+
+ self.inlinePatterns = [
BACKTICK_PATTERN,
ESCAPE_PATTERN,
REFERENCE_PATTERN,
- LINK_ANGLED_PATTERN,
LINK_PATTERN,
IMAGE_LINK_PATTERN,
- IMAGE_REFERENCE_PATTERN,
- AUTOLINK_PATTERN,
+ IMAGE_REFERENCE_PATTERN,
+ AUTOLINK_PATTERN,
AUTOMAIL_PATTERN,
LINE_BREAK_PATTERN_2,
LINE_BREAK_PATTERN,
@@ -1391,13 +1198,15 @@ class Markdown:
ENTITY_PATTERN,
NOT_STRONG_PATTERN,
STRONG_EM_PATTERN,
- STRONG_EM_PATTERN_2,
STRONG_PATTERN,
- STRONG_PATTERN_2,
EMPHASIS_PATTERN,
EMPHASIS_PATTERN_2
# The order of the handlers matters!!!
]
+
+ self.inlineStash = InlineStash()
+
+ self._inlineOperationID = None
self.registerExtensions(extensions = extensions,
configs = extension_configs)
@@ -1440,6 +1249,7 @@ class Markdown:
"""
self.references={}
self.htmlStash = HtmlStash()
+ self.inlineStash = InlineStash()
HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
LINE_PREPROCESSOR.stash = self.htmlStash
@@ -1461,17 +1271,13 @@ class Markdown:
def _transform(self):
"""Transform the Markdown text into a XHTML body document.
- Returns: A NanoDom Document
+ Returns: ElementTree object
"""
# Setup the document
-
- self.doc = Document()
- self.top_element = self.doc.createElement("span")
- self.top_element.appendChild(self.doc.createTextNode('\n'))
- self.top_element.setAttribute('class', 'markdown')
- self.doc.appendChild(self.top_element)
+
+ self.root = etree.Element("span")
# Split into lines and run the preprocessors that will work with
# self.lines
@@ -1482,32 +1288,24 @@ class Markdown:
for prep in self.preprocessors :
self.lines = prep.run(self.lines)
- # Create a NanoDom tree from the lines and attach it to Document
-
+ # Create a ElementTree from the lines
buffer = []
for line in self.lines:
if line.startswith("#"):
- self._processSection(self.top_element, buffer)
+
+ self._processSection(self.root, buffer)
buffer = [line]
else:
buffer.append(line)
- self._processSection(self.top_element, buffer)
-
- #self._processSection(self.top_element, self.lines)
-
- # Not sure why I put this in but let's leave it for now.
- self.top_element.appendChild(self.doc.createTextNode('\n'))
-
- # Run the post-processors
- for postprocessor in self.postprocessors:
- postprocessor.run(self.doc)
- return self.doc
+ self._processSection(self.root, buffer)
+
+ return etree.ElementTree(self.root)
def _processSection(self, parent_elem, lines,
- inList = 0, looseList = 0):
+ inList=0, looseList=0):
"""
Process a section of a source document, looking for high
level structural elements like lists, block quotes, code
@@ -1517,17 +1315,22 @@ class Markdown:
Keyword arguments:
- * parent_elem: A NanoDom element to which the content will be added.
+ * parent_elem: A ElementTree element to which the content will be added.
* lines: a list of lines
* inList: a level
Returns: None
"""
-
+
# Loop through lines until none left.
while lines:
-
+
+ # Skipping empty line
+ if not lines[0]:
+ lines = lines[1:]
+ continue
+
# Check if this section starts with a list, a blockquote or
# a code block
@@ -1571,11 +1374,17 @@ class Markdown:
else: # Ok, so it's just a simple block
paragraph, lines = self._linesUntil(lines, lambda line:
- not line.strip())
+ not line.strip() or line[0] == '>')
if len(paragraph) and paragraph[0].startswith('#'):
self._processHeader(parent_elem, paragraph)
+
+ elif len(paragraph) and \
+ RE.regExp["isline3"].match(paragraph[0]):
+ self._processHR(parent_elem)
+ lines = paragraph[1:] + lines
+
elif paragraph:
self._processParagraph(parent_elem, paragraph,
inList, looseList)
@@ -1583,48 +1392,59 @@ class Markdown:
if lines and not lines[0].strip():
lines = lines[1:] # skip the first (blank) line
-
- def _processHeader(self, parent_elem, paragraph):
+ def _processHR(self, parentElem):
+ hr = etree.SubElement(parentElem, "hr")
+
+ def _processHeader(self, parentElem, paragraph):
m = RE.regExp['header'].match(paragraph[0])
if m:
level = len(m.group(1))
- h = self.doc.createElement("h%d" % level)
- parent_elem.appendChild(h)
- for item in self._handleInline(m.group(2).strip()):
- h.appendChild(item)
+ h = etree.SubElement(parentElem, "h%d" % level)
+ inline = etree.SubElement(h, "inline")
+ inline.text = m.group(2).strip()
else:
message(CRITICAL, "We've got a problem header!")
- def _processParagraph(self, parent_elem, paragraph, inList, looseList):
- list = self._handleInline("\n".join(paragraph))
+ def _processParagraph(self, parentElem, paragraph, inList, looseList):
- if ( parent_elem.nodeName == 'li'
- and not (looseList or parent_elem.childNodes)):
+ if ( parentElem.tag == 'li'
+ and not (looseList or parentElem.getchildren())):
# If this is the first paragraph inside "li", don't
# put <p> around it - append the paragraph bits directly
- # onto parent_elem
- el = parent_elem
+ # onto parentElem
+ el = parentElem
else:
# Otherwise make a "p" element
- el = self.doc.createElement("p")
- parent_elem.appendChild(el)
+ el = etree.SubElement(parentElem, "p")
- for item in list:
- el.appendChild(item)
-
-
- def _processUList(self, parent_elem, lines, inList):
- self._processList(parent_elem, lines, inList,
+ dump = []
+
+ # Searching for hr
+ for line in paragraph:
+ if RE.regExp["isline3"].match(line):
+ inline = etree.SubElement(el, "inline")
+ inline.text = "\n".join(dump)
+ etree.SubElement(el, "hr")
+ dump.clear()
+ else:
+ dump.append(line)
+ if dump:
+ text = "\n".join(dump)
+ inline = etree.SubElement(el, "inline")
+ inline.text = text
+
+ def _processUList(self, parentElem, lines, inList):
+ self._processList(parentElem, lines, inList,
listexpr='ul', tag = 'ul')
- def _processOList(self, parent_elem, lines, inList):
- self._processList(parent_elem, lines, inList,
+ def _processOList(self, parentElem, lines, inList):
+ self._processList(parentElem, lines, inList,
listexpr='ol', tag = 'ol')
- def _processList(self, parent_elem, lines, inList, listexpr, tag):
+ def _processList(self, parentElem, lines, inList, listexpr, tag):
"""
Given a list of document lines starting with a list item,
finds the end of the list, breaks it up, and recursively
@@ -1632,7 +1452,7 @@ class Markdown:
Keyword arguments:
- * parent_elem: A dom element to which the content will be added
+ * parentElem: A ElementTree element to which the content will be added
* lines: a list of lines
* inList: a level
@@ -1640,8 +1460,7 @@ class Markdown:
"""
- ul = self.doc.createElement(tag) # ul might actually be '<ol>'
- parent_elem.appendChild(ul)
+ ul = etree.SubElement(parentElem, tag) # ul might actually be '<ol>'
looseList = 0
@@ -1669,6 +1488,7 @@ class Markdown:
break
# Check if the next non-blank line is still a part of the list
+
if ( RE.regExp[listexpr].match(next) or
RE.regExp['tabbed'].match(next) ):
# get rid of any white space in the line
@@ -1702,16 +1522,15 @@ class Markdown:
else:
i += 1
- # Add the dom elements
+ # Add the ElementTree elements
for item in items:
- li = self.doc.createElement("li")
- ul.appendChild(li)
+ li = etree.SubElement(ul, "li")
self._processSection(li, item, inList + 1, looseList = looseList)
# Process the remaining part of the section
- self._processSection(parent_elem, lines[i:], inList)
+ self._processSection(parentElem, lines[i:], inList)
def _linesUntil(self, lines, condition):
@@ -1725,12 +1544,13 @@ class Markdown:
i = -1
for line in lines:
i += 1
- if condition(line): break
+ if condition(line):
+ break
else:
i += 1
return lines[:i], lines[i:]
- def _processQuote(self, parent_elem, lines, inList):
+ def _processQuote(self, parentElem, lines, inList):
"""
Given a list of document lines starting with a quote finds
the end of the quote, unindents it and recursively
@@ -1739,7 +1559,7 @@ class Markdown:
Keyword arguments:
- * parent_elem: DOM element to which the content will be added
+ * parentElem: ElementTree element to which the content will be added
* lines: a list of lines
* inList: a level
@@ -1766,25 +1586,24 @@ class Markdown:
else:
break
- blockquote = self.doc.createElement('blockquote')
- parent_elem.appendChild(blockquote)
+ blockquote = etree.SubElement(parentElem, "blockquote")
self._processSection(blockquote, dequoted, inList)
- self._processSection(parent_elem, lines[i:], inList)
+ self._processSection(parentElem, lines[i:], inList)
- def _processCodeBlock(self, parent_elem, lines, inList):
+ def _processCodeBlock(self, parentElem, lines, inList):
"""
Given a list of document lines starting with a code block
- finds the end of the block, puts it into the dom verbatim
+ finds the end of the block, puts it into the ElementTree verbatim
wrapped in ("<pre><code>") and recursively processes the
the remainder of the text file.
Keyword arguments:
- * parent_elem: DOM element to which the content will be added
+ * parentElem: ElementTree element to which the content will be added
* lines: a list of lines
* inList: a level
@@ -1794,163 +1613,264 @@ class Markdown:
detabbed, theRest = self.blockGuru.detectTabbed(lines)
- pre = self.doc.createElement('pre')
- code = self.doc.createElement('code')
- parent_elem.appendChild(pre)
- pre.appendChild(code)
+ pre = etree.SubElement(parentElem, "pre")
+ code = etree.SubElement(pre, "code")
+
text = "\n".join(detabbed).rstrip()+"\n"
- #text = text.replace("&", "&amp;")
- code.appendChild(self.doc.createTextNode(text))
- self._processSection(parent_elem, theRest, inList)
-
-
-
- def _handleInline (self, line, patternIndex=0):
+ code.text = text
+ self._processSection(parentElem, theRest, inList)
+
+ def _handleInline(self, data, patternIndex=0):
"""
- Transform a Markdown line with inline elements to an XHTML
- fragment.
-
- This function uses auxiliary objects called inline patterns.
- See notes on inline patterns above.
+ Processinf string with inline patterns and replasing it
+ with placeholders
Keyword arguments:
- * line: A line of Markdown text
+ * data: A line of Markdown text
* patternIndex: The index of the inlinePattern to start with
- Return: A list of NanoDom nodes
+ Return: String with placeholders.
"""
-
-
- parts = [line]
-
- while patternIndex < len(self.inlinePatterns):
-
- i = 0
-
- while i < len(parts):
-
- x = parts[i]
-
- if isinstance(x, (str, unicode)):
- result = self._applyPattern(x, \
- self.inlinePatterns[patternIndex], \
- patternIndex)
-
- if result:
- i -= 1
- parts.remove(x)
- for y in result:
- parts.insert(i+1,y)
-
- i += 1
- patternIndex += 1
-
- for i in range(len(parts)):
- x = parts[i]
- if isinstance(x, (str, unicode)):
- parts[i] = self.doc.createTextNode(x)
-
- return parts
+ startIndex = 0
-
- def _applyPattern(self, line, pattern, patternIndex):
-
+ while patternIndex < len(self.inlinePatterns):
+
+ data, matched, startIndex = self._applyInline(
+ self.inlinePatterns[patternIndex],
+ data, patternIndex, startIndex)
+ if not matched:
+ patternIndex += 1
+ return data
+
+ def _applyInline(self, pattern, data, patternIndex, startIndex=0):
"""
Given a pattern name, this function checks if the line
- fits the pattern, creates the necessary elements, and returns
- back a list consisting of NanoDom elements and/or strings.
+ fits the pattern, creates the necessary elements, adds it
+ to InlineStash, and returns string with placeholders,
+ instead of ElementTree elements.
Keyword arguments:
- * line: the text to be processed
+ * data: the text to be processed
* pattern: the pattern to be checked
+ * patternIndex: index of current pattern
+ * startIndex: string index, from wich we starting search
- Returns: The appropriate newly created NanoDom element if the
- pattern matches, None otherwise.
+ Returns: String with placeholders.
"""
+ match = pattern.getCompiledRegExp().match(data[startIndex:])
+ leftData = data[:startIndex]
+
+ if not match:
+ return data, False, 0
- # match the line to pattern's pre-compiled reg exp.
- # if no match, move on.
-
-
-
- m = pattern.getCompiledRegExp().match(line)
- if not m:
- return None
-
- # if we got a match let the pattern make us a NanoDom node
- # if it doesn't, move on
- node = pattern.handleMatch(m, self.doc)
-
- # check if any of this nodes have children that need processing
-
- if isinstance(node, Element):
-
- if not node.nodeName in ["code", "pre"]:
- for child in node.childNodes:
- if isinstance(child, TextNode):
-
- result = self._handleInline(child.value, patternIndex+1)
-
- if result:
-
- if result == [child]:
- continue
-
- result.reverse()
- #to make insertion easier
+ node = pattern.handleMatch(match)
+
+ if node is None:
+ return data, True, len(leftData) + match.span(len(match.groups()))[0]
+
+ if not isstr(node):
+ if not node.tag in ["code", "pre"]:
+ # We need to process current node too
+ for child in [node] + node.getchildren():
+ if not isstr(node):
+ if child.text:
+ child.text = self._handleInline(child.text,
+ patternIndex + 1)
+ if child.tail:
+ child.tail = self._handleInline(child.tail,
+ patternIndex)
+
+ pholder = self.inlineStash.add(node, pattern.type())
+
+ return "%s%s%s%s" % (leftData,
+ match.group(1),
+ pholder, match.groups()[-1]), True, 0
+
+ def _processElementText(self, node, subnode, isText=True):
+
+ if isText:
+ text = subnode.text
+ subnode.text = None
+ else:
+ text = subnode.tail
+ subnode.tail = None
+
+ childResult = self._processPlaceholders(text, subnode)
+
+ if not isText and node is not subnode:
+ pos = node.getchildren().index(subnode)
+ node.remove(subnode)
+ else:
+ pos = 0
+
+ childResult.reverse()
+ for newChild in childResult:
+ node.insert(pos, newChild)
+
+ def _processPlaceholders(self, data, parent):
+ """
+ Processes string with placeholders and generates ElementTree tree.
+
+ * data: string with placeholders instead of ElementTree elements.
- position = node.childNodes.index(child)
+ Returns: list with ElementTree elements with applied inline patterns.
+ """
+
+ def linkText(text):
+ if text:
+ if result:
+ if result[-1].tail:
+ result[-1].tail += text
+ else:
+ result[-1].tail = text
+ else:
+ if parent.text:
+ parent.text += text
+ else:
+ parent.text = text
+
+ result = []
+ prefix = self.inlineStash.prefix
+ strartIndex = 0
+ while data:
+
+ index = data.find(prefix, strartIndex)
+ if index != -1:
+
+ id, phEndIndex = self.inlineStash.extractId(data, index)
+
+ if self.inlineStash.isin(id):
+
+ node = self.inlineStash.get(id)
+
+ if index > 0:
+ text = data[strartIndex:index]
+ linkText(text)
+
+ if not isstr(node): # it's Element
+
+ for child in [node] + node.getchildren():
+
+ if child.tail:
+ self._processElementText(node, child, False)
- node.removeChild(child)
-
- for item in result:
-
- if isinstance(item, (str, unicode)):
- if len(item) > 0:
- node.insertChild(position,
- self.doc.createTextNode(item))
- else:
- node.insertChild(position, item)
+ if child.text:
+ self._processElementText(child, child)
+
+ else: # it's just a string
+ linkText(node)
+ strartIndex = phEndIndex
+ continue
+
+ strartIndex = phEndIndex
+ result.append(node)
+
+ else: # wrong placeholder
+ end = index + len(prefix)
+ linkText(data[strartIndex:end])
+ strartIndex = end
+ else:
+
+ text = data[strartIndex:]
+ linkText(text)
+ data = ""
+ return result
+
+ def _processTree(self, el):
+ """
+ Processing ElementTree, and applying inline patterns
+
+ Keyword arguments:
+
+ * el - parent element of ElementTree.
+ Returns: ElementTree object with applied inline patterns.
+ """
- if node:
- # Those are in the reverse order!
- return ( m.groups()[-1], # the string to the left
- node, # the new node
- m.group(1)) # the string to the right of the match
-
- else:
- return None
+ stack = [el]
+ while stack:
+ currElement = stack.pop()
+ insertQueue = []
+ for child in currElement.getchildren():
+
+ if child.tag == "inline":
+
+ lst = self._processPlaceholders(self._handleInline(
+ child.text), currElement)
+
+ pos = currElement.getchildren().index(child)
+
+ insertQueue.append((child, pos, lst))
+
+ else:
+ stack.append(child)
+
+
+ for element, pos, lst in insertQueue:
+ currElement.remove(element)
+ if currElement.text:
+ currElement.text = handleAttributes(currElement.text,
+ currElement)
+ for newChild in lst:
+ # Processing attributes
+ if newChild.tail:
+ newChild.tail = handleAttributes(newChild.tail,
+ currElement)
+ if newChild.text:
+ newChild.text = handleAttributes(newChild.text,
+ newChild)
+ currElement.insert(pos, newChild)
+ pos += 1
+
+
+ def applyInlinePatterns(self, markdownTree):
+ """
+ Retrun ElementTree, with applied
+ inline paterns
+
+ Keyword arguments:
+
+ * markdownTree: ElementTree object, reppresenting Markdown tree.
- def convert (self, source=None):
+ Returns: ElementTree object.
"""
- Return the document in XHTML format.
+
+ el = markdownTree.getroot()
+
+ self._processTree(el)
+
+ return markdownTree
+
+ def markdownToTree(self, source=None):
+ """
+ Retrun ElementTree, without applying inline paterns,
+ all data, that should be processed with
+ inline patterns included in <inline></inline> sections.
+
Keyword arguments:
* source: An ascii or unicode string of Markdown formated text.
- Returns: A serialized XHTML body.
-
+ Returns: ElementTree object.
"""
-
if source is not None: #Allow blank string
self.source = source
-
+
if not self.source:
return u""
-
+
try:
self.source = unicode(self.source)
except UnicodeDecodeError:
message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
return u""
-
+
# Fixup the source text
self.source = self.source.replace(START, "")
@@ -1962,20 +1882,48 @@ class Markdown:
for pp in self.textPreprocessors:
self.source = pp.run(self.source)
+
+ markdownTree = self._transform()
+
+ return markdownTree
+
+
- doc = self._transform()
- xml = doc.toxml()
+ def convert (self, source=None):
+ """
+ Return the document in XHTML format.
+ Keyword arguments:
+
+ * source: An ascii or unicode string of Markdown formated text.
- # Return everything but the top level tag
+ Returns: A serialized XHTML body.
+ """
+
+ tree = self.markdownToTree(source)
+
+ root = self.applyInlinePatterns(tree).getroot()
+
+ # Run the post-processors
+ for postprocessor in self.postprocessors:
+ postprocessor.stash = self.htmlStash
+ newRoot = postprocessor.run(root)
+ if newRoot:
+ root = newRoot
+
+ indentETree(root)
+
+ xml = codecs.decode(etree.tostring(root, encoding="utf8"), "utf8")
+
if self.stripTopLevelTags:
- xml = xml.strip()[23:-7] + "\n"
+ xml = xml.strip()[44:-7] + "\n"
+ # Run the text post-processors
for pp in self.textPostprocessors:
xml = pp.run(xml)
- return (self.docType + xml).strip()
+ return xml.strip()
def __str__(self):
diff --git a/mdx_codehilite.py b/mdx_codehilite.py
index 6f81598..73c1a79 100644..100755
--- a/mdx_codehilite.py
+++ b/mdx_codehilite.py
@@ -80,10 +80,11 @@ class CodeHilite:
except ImportError:
# just escape and pass through
txt = self._escape(self.src)
- if num:
+ '''if num:
txt = self._number(txt)
else :
- txt = '<div class="codehilite"><pre>%s</pre></div>\n'% txt
+ txt = '<div class="codehilite"><pre>%s</pre></div>\n'% txt'''
+ txt = self._number(txt)
return txt
else:
try:
@@ -204,7 +205,10 @@ class CodeHiliteExtention(markdown.Extension):
text = "\n".join(detabbed).rstrip()+"\n"
code = CodeHilite(text, linenos=self.config['force_linenos'][0])
placeholder = md.htmlStash.store(code.hilite(), safe=True)
- parent_elem.appendChild(md.doc.createTextNode(placeholder))
+ if parent_elem.text:
+ parent_elem.text += placeholder
+ else:
+ parent_elem.text = placeholder
md._processSection(parent_elem, theRest, inList)
md._processCodeBlock = _hiliteCodeBlock
diff --git a/mdx_footnotes.py b/mdx_footnotes.py
index 4464c50..b46efbb 100644..100755
--- a/mdx_footnotes.py
+++ b/mdx_footnotes.py
@@ -27,6 +27,7 @@ FN_BACKLINK_TEXT = "zz1337820767766393qq"
import re, markdown, random
+from markdown import etree
class FootnoteExtension (markdown.Extension):
@@ -80,15 +81,21 @@ class FootnoteExtension (markdown.Extension):
self.used_footnotes={}
self.footnotes = {}
- def findFootnotesPlaceholder(self, doc) :
- def findFootnotePlaceholderFn(node=None, indent=0):
- if node.type == 'text':
- if node.value.find(self.getConfig("PLACE_MARKER")) > -1 :
- return True
-
- fn_div_list = doc.find(findFootnotePlaceholderFn)
- if fn_div_list :
- return fn_div_list[0]
+ def findFootnotesPlaceholder(self, root):
+
+ def finder(element):
+ for child in element:
+ if child.text:
+ if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
+ return child, True
+ if child.tail:
+ if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
+ return (child, element), False
+ finder(child)
+ return None
+
+ res = finder(root)
+ return res
def setFootnote(self, id, text) :
@@ -100,7 +107,7 @@ class FootnoteExtension (markdown.Extension):
def makeFootnoteRefId(self, num) :
return 'fnr%d%s' % (num, self.footnote_suffix)
- def makeFootnotesDiv (self, doc) :
+ def makeFootnotesDiv (self, root) :
"""Creates the div with class='footnote' and populates it with
the text of the footnotes.
@@ -109,45 +116,39 @@ class FootnoteExtension (markdown.Extension):
if not self.footnotes.keys() :
return None
- div = doc.createElement("div")
- div.setAttribute('class', 'footnote')
- hr = doc.createElement("hr")
- div.appendChild(hr)
- ol = doc.createElement("ol")
- div.appendChild(ol)
+ div = etree.Element("div")
+ div.set('class', 'footnote')
+ hr = etree.SubElement(div, "hr")
+ ol = etree.SubElement(div, "ol")
+
footnotes = [(self.used_footnotes[id], id)
for id in self.footnotes.keys()]
footnotes.sort()
for i, id in footnotes :
- li = doc.createElement('li')
- li.setAttribute('id', self.makeFootnoteId(i))
+ li = etree.SubElement(ol, "li")
+ li.set("id", self.makeFootnoteId(i))
self.md._processSection(li, self.footnotes[id].split("\n"), looseList=1)
- #li.appendChild(doc.createTextNode(self.footnotes[id]))
-
- backlink = doc.createElement('a')
- backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
- backlink.setAttribute('class', 'footnoteBackLink')
- backlink.setAttribute('title',
- 'Jump back to footnote %d in the text' % i)
- backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
-
- if li.childNodes :
- node = li.childNodes[-1]
- if node.type == "text" :
- li.appendChild(backlink)
- elif node.nodeName == "p":
- node.appendChild(backlink)
+ backlink = etree.Element("a")
+ backlink.set("href", "#" + self.makeFootnoteRefId(i))
+ backlink.set("class", "footnoteBackLink")
+ backlink.set("title",
+ "Jump back to footnote %d in the text" % i)
+ backlink.text = FN_BACKLINK_TEXT
+
+ if li.getchildren():
+ node = li[-1]
+ if node.text:
+ li.append(backlink)
+ elif node.tag == "p":
+ node.append(backlink)
else:
- p = doc.createElement('p')
- p.appendChild(backlink)
- li.appendChild(p)
-
- ol.appendChild(li)
-
+ p = etree.SubElement(li, "p")
+ p.append(backlink)
+ div = self.md.applyInlinePatterns(etree.ElementTree(div)).getroot()
return div
@@ -194,7 +195,7 @@ class FootnotePreprocessor :
plain = lines[:i]
detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
-
+
self.footnotes.setFootnote(id,
footnote + "\n"
+ "\n".join(detabbed))
@@ -227,15 +228,14 @@ class FootnotePattern (markdown.Pattern) :
markdown.Pattern.__init__(self, pattern)
self.footnotes = footnotes
- def handleMatch(self, m, doc) :
- sup = doc.createElement('sup')
- a = doc.createElement('a')
- sup.appendChild(a)
+ def handleMatch(self, m) :
+ sup = etree.Element("sup")
+ a = etree.SubElement(sup, "a")
id = m.group(2)
num = self.footnotes.used_footnotes[id]
- sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
- a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
- a.appendChild(doc.createTextNode(str(num)))
+ sup.set('id', self.footnotes.makeFootnoteRefId(num))
+ a.set('href', '#' + self.footnotes.makeFootnoteId(num))
+ a.text = str(num)
return sup
class FootnotePostprocessor (markdown.Postprocessor):
@@ -243,14 +243,25 @@ class FootnotePostprocessor (markdown.Postprocessor):
def __init__ (self, footnotes) :
self.footnotes = footnotes
- def run(self, doc) :
- footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
- if footnotesDiv :
- fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
- if fnPlaceholder :
+ def run(self, root):
+ footnotesDiv = self.footnotes.makeFootnotesDiv(root)
+ if footnotesDiv:
+ result = self.extension.findFootnotesPlaceholder(root)
+
+ if result:
+ node, isText = result
+ if isText:
+ node.text = None
+ node.getchildren().insert(0, footnotesDiv)
+ else:
+ child, element = node
+ ind = element.getchildren().find(child)
+ element.getchildren().insert(ind + 1, footnotesDiv)
+ child.tail = None
+
fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
else :
- doc.documentElement.appendChild(footnotesDiv)
+ root.append(footnotesDiv)
class FootnoteTextPostprocessor (markdown.Postprocessor):
diff --git a/mdx_headerid.py b/mdx_headerid.py
index 2e658cd..2360071 100644..100755
--- a/mdx_headerid.py
+++ b/mdx_headerid.py
@@ -68,6 +68,7 @@ Dependencies:
"""
import markdown
+from markdown import etree
import re
from string import ascii_lowercase, digits, punctuation
@@ -106,15 +107,16 @@ class HeaderIdExtension (markdown.Extension) :
if m :
start_level, force_id = _get_meta()
level = len(m.group(1)) + start_level
- if level > 6: level = 6
- h = md.doc.createElement("h%d" % level)
- parent_elem.appendChild(h)
- for item in md._handleInline(m.group(2).strip()) :
- h.appendChild(item)
+ if level > 6:
+ level = 6
+ h = etree.Element("h%d" % level)
+ parent_elem.append(h)
+ inline = etree.SubElement(h, "inline")
+ inline.text = m.group(2).strip()
if m.group(3) :
- h.setAttribute('id', _unique_id(m.group(3)))
+ h.set('id', _unique_id(m.group(3)))
elif force_id:
- h.setAttribute('id', _create_id(m.group(2).strip()))
+ h.set('id', _create_id(m.group(2).strip()))
else :
message(CRITICAL, "We've got a problem header!")
diff --git a/mdx_imagelinks.py b/mdx_imagelinks.py
index e545b24..e545b24 100644..100755
--- a/mdx_imagelinks.py
+++ b/mdx_imagelinks.py
diff --git a/mdx_rss.py b/mdx_rss.py
index c30440a..b88b9b5 100644..100755
--- a/mdx_rss.py
+++ b/mdx_rss.py
@@ -1,4 +1,5 @@
import markdown
+from markdown import etree
DEFAULT_URL = "http://www.freewisdom.org/projects/python-markdown/"
DEFAULT_CREATOR = "Yuri Takhteyev"
@@ -18,7 +19,7 @@ month_map = { "Jan" : "01",
"November" : "11",
"December" : "12" }
-def get_time(heading) :
+def get_time(heading):
heading = heading.split("-")[0]
heading = heading.strip().replace(",", " ").replace(".", " ")
@@ -28,7 +29,7 @@ def get_time(heading) :
return rdftime(" ".join((month, date, year, "12:00:00 AM")))
-def rdftime(time) :
+def rdftime(time):
time = time.replace(":", " ")
time = time.replace("/", " ")
@@ -37,12 +38,12 @@ def rdftime(time) :
time[3], time[4], time[5])
-def get_date(text) :
+def get_date(text):
return "date"
class RssExtension (markdown.Extension):
- def extendMarkdown(self, md, md_globals) :
+ def extendMarkdown(self, md, md_globals):
self.config = { 'URL' : [DEFAULT_URL, "Main URL"],
'CREATOR' : [DEFAULT_CREATOR, "Feed creator's name"],
@@ -59,63 +60,59 @@ class RssExtension (markdown.Extension):
class RssPostProcessor (markdown.Postprocessor):
- def __init__(self, md) :
+ def __init__(self, md):
pass
- def run (self, doc) :
+ def run (self, root):
- oldDocElement = doc.documentElement
- rss = doc.createElement("rss")
- rss.setAttribute('version', '2.0')
+ rss = etree.Element("rss")
+ rss.set("version", "2.0")
- doc.appendChild(rss)
+ channel = etree.SubElement(rss, "channel")
- channel = doc.createElement("channel")
- rss.appendChild(channel)
for tag, text in (("title", self.ext.getConfig("TITLE")),
("link", self.ext.getConfig("URL")),
("description", None)):
- channel.appendChild(doc.createElement(tag, textNode = text))
-
- item = None
- for child in oldDocElement.childNodes :
-
- if child.type == "element" :
-
- if child.nodeName in ["h1", "h2", "h3", "h4", "h5"] :
-
- heading = child.childNodes[0].value.strip()
-
- item = doc.createElement("item")
- channel.appendChild(item)
- item.appendChild(doc.createElement("link",
- self.ext.getConfig("URL")))
-
- item.appendChild(doc.createElement("title", heading))
-
- guid = ''.join([x for x in heading if x.isalnum()])
-
- guidElem = doc.createElement("guid", guid)
- guidElem.setAttribute("isPermaLink", "false")
- item.appendChild(guidElem)
-
- elif child.nodeName in ["p"] :
-
- description = doc.createElement("description")
-
-
- content = "\n".join([node.toxml()
- for node in child.childNodes])
-
- cdata = doc.createCDATA(content)
-
- description.appendChild(cdata)
-
- if item :
- item.appendChild(description)
-
-
-def makeExtension(configs) :
+
+ element = etree.SubElement(channel, tag)
+ element.text = text
+
+ for child in root:
+
+
+ if child.tag in ["h1", "h2", "h3", "h4", "h5"] :
+
+ heading = child.text.strip()
+
+ item = etree.SubElement(channel, "item")
+
+ link = etree.SubElement(item, "link")
+ link.text = self.ext.getConfig("URL")
+
+ title = etree.SubElement(item, "title")
+ title.text = heading
+
+ guid = ''.join([x for x in heading if x.isalnum()])
+
+ guidElem = etree.SubElement(item, "guid")
+ guidElem.text = guid
+ guidElem.set("isPermaLink", "false")
+
+ elif child.tag in ["p"] :
+ if item:
+ description = etree.SubElement(item, "description")
+ if len(child):
+ content = "\n".join([etree.tostring(node)
+ for node in child])
+ else:
+ content = child.text
+ pholder = self.stash.store("<![CDATA[ %s]]>" % content)
+ description.text = pholder
+
+ return rss
+
+
+def makeExtension(configs):
return RssExtension(configs)
diff --git a/mdx_wikilink.py b/mdx_wikilink.py
index 46d6c3f..47037a6 100644..100755
--- a/mdx_wikilink.py
+++ b/mdx_wikilink.py
@@ -69,6 +69,7 @@ Dependencies:
'''
import markdown
+from markdown import etree
class WikiLinkExtension (markdown.Extension) :
def __init__(self, configs):
@@ -91,24 +92,25 @@ class WikiLinkExtension (markdown.Extension) :
WIKILINK_PATTERN = WikiLinks(WIKILINK_RE, self.config)
WIKILINK_PATTERN.md = md
md.inlinePatterns.append(WIKILINK_PATTERN)
+
class WikiLinks (markdown.BasePattern) :
def __init__(self, pattern, config):
markdown.BasePattern.__init__(self, pattern)
self.config = config
- def handleMatch(self, m, doc) :
+ def handleMatch(self, m):
if m.group('escape') == '\\':
- a = doc.createTextNode(m.group('camelcase'))
+ a = m.group('camelcase')
else:
base_url, end_url, html_class = self._getMeta()
url = '%s%s%s'% (base_url, m.group('camelcase'), end_url)
label = m.group('camelcase').replace('_', ' ')
- a = doc.createElement('a')
- a.appendChild(doc.createTextNode(label))
- a.setAttribute('href', url)
+ a = etree.Element('a')
+ a.text = label
+ a.set('href', url)
if html_class:
- a.setAttribute('class', html_class)
+ a.set('class', html_class)
return a
def _getMeta(self):
@@ -124,6 +126,9 @@ class WikiLinks (markdown.BasePattern) :
if self.md.Meta.has_key('wiki_html_class'):
html_class = self.md.Meta['wiki_html_class'][0]
return base_url, end_url, html_class
+
+ def type(self):
+ return "WLink"
def makeExtension(configs=None) :
diff --git a/test-markdown.py b/test-markdown.py
index 440ec42..c936464 100644
--- a/test-markdown.py
+++ b/test-markdown.py
@@ -244,9 +244,9 @@ def testDirectory(dir, measure_time=False, safe_mode=False) :
t = time.clock()
for x in repeat:
actual_output = md.convert(input)
-
conversion_time = time.clock() - t
conversion_mem = memory(mem)
+ md.reset()
expected_lines = [x.encode("utf8") for x in smart_split(expected_output)]
actual_lines = [x.encode("utf8") for x in smart_split(actual_output)]
@@ -361,4 +361,7 @@ testDirectory("tests/misc", measure_time=True)
testDirectory("tests/extensions-x-footnotes")
#testDirectory("tests/extensions-x-tables")
# testDirectory("tests/extensions-x-ext1-ext2")
-testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape")
+testDirectory("tests/safe_mode", measure_time=True, safe_mode="escape")
+
+#testDirectory("tests2/php-markdown-cases-new", measure_time=True)
+#testDirectory("tests2/tm-cases-new", measure_time=True)
diff --git a/tests/extensions-x-footnotes/footnote.html b/tests/extensions-x-footnotes/footnote.html
index de0070d..4a844be 100644
--- a/tests/extensions-x-footnotes/footnote.html
+++ b/tests/extensions-x-footnotes/footnote.html
@@ -1,40 +1,42 @@
-<p>This is the body with a footnote<sup id="fnr1-5771256"><a href="#fn1-5771256">1</a></sup> or two<sup id="fnr2-5771256"><a href="#fn2-5771256">2</a></sup> or more<sup id="fnr3-5771256"><a href="#fn3-5771256">3</a></sup> <sup id="fnr4-5771256"><a href="#fn4-5771256">4</a></sup>.
-</p>
-
-<div class="footnote"><hr/><ol>
- <li id="fn1-5771256"><p>Footnote that ends with a list:
-</p>
-<ul>
- <li>
- item 1
- </li>
-
- <li>
- item 2
- </li>
-</ul>
-<p><a href="#fnr1-5771256" class="footnoteBackLink" title="Jump back to footnote 1 in the text">&#8617;</a>
-</p>
-
- </li>
-
- <li id="fn2-5771256"><blockquote><p>This footnote is a blockquote.
-</p>
-</blockquote><p><a href="#fnr2-5771256" class="footnoteBackLink" title="Jump back to footnote 2 in the text">&#8617;</a>
-</p>
-
- </li>
-
- <li id="fn3-5771256"><p>A simple oneliner.<a href="#fnr3-5771256" class="footnoteBackLink" title="Jump back to footnote 3 in the text">&#8617;</a>
-</p>
-
- </li>
-
- <li id="fn4-5771256"><p>A footnote with multiple paragraphs.
-</p>
-<p>Paragraph two.<a href="#fnr4-5771256" class="footnoteBackLink" title="Jump back to footnote 4 in the text">&#8617;</a>
-</p>
-
- </li>
-</ol>
-</div> \ No newline at end of file
+<p>This is the body with a footnote<sup id="fnr1-506476047">
+ <a href="#fn1-506476047">1</a>
+ </sup> or two<sup id="fnr2-506476047">
+ <a href="#fn2-506476047">2</a>
+ </sup> or more<sup id="fnr3-506476047">
+ <a href="#fn3-506476047">3</a>
+ </sup>
+ <sup id="fnr4-506476047">
+ <a href="#fn4-506476047">4</a>
+ </sup>.</p>
+<div class="footnote">
+ <hr />
+ <ol>
+ <li id="fn1-506476047">
+ <p>Footnote that ends with a list:</p>
+ <ul>
+ <li>item 1</li>
+ <li>item 2</li>
+ </ul>
+ <p>
+ <a class="footnoteBackLink" href="#fnr1-506476047" title="Jump back to footnote 1 in the text">&#8617;</a>
+ </p>
+ </li>
+ <li id="fn2-506476047">
+ <blockquote>
+ <p>This footnote is a blockquote.</p>
+ </blockquote>
+ <p>
+ <a class="footnoteBackLink" href="#fnr2-506476047" title="Jump back to footnote 2 in the text">&#8617;</a>
+ </p>
+ </li>
+ <li id="fn3-506476047">
+ <p>A simple oneliner.<a class="footnoteBackLink" href="#fnr3-506476047" title="Jump back to footnote 3 in the text">&#8617;</a>
+ </p>
+ </li>
+ <li id="fn4-506476047">
+ <p>A footnote with multiple paragraphs.</p>
+ <p>Paragraph two.<a class="footnoteBackLink" href="#fnr4-506476047" title="Jump back to footnote 4 in the text">&#8617;</a>
+ </p>
+ </li>
+ </ol>
+</div>
diff --git a/tests/markdown-test/amps-and-angle-encoding.html b/tests/markdown-test/amps-and-angle-encoding.html
index 18df2c3..fc1b2c3 100644
--- a/tests/markdown-test/amps-and-angle-encoding.html
+++ b/tests/markdown-test/amps-and-angle-encoding.html
@@ -1,21 +1,9 @@
-
-<p>AT&amp;T has an ampersand in their name.
-</p>
-<p>AT&amp;T is another way to write it.
-</p>
-<p>This &amp; that.
-</p>
-<p>4 &lt; 5.
-</p>
-<p>6 &gt; 5.
-</p>
-<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.
-</p>
-<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.
-</p>
-<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.
-</p>
-<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.
-</p>
-
-
+<p>AT&amp;T has an ampersand in their name.</p>
+<p>AT&amp;T is another way to write it.</p>
+<p>This &amp; that.</p>
+<p>4 &lt; 5.</p>
+<p>6 &gt; 5.</p>
+<p>Here's a <a href="http://example.com/?foo=1&amp;bar=2">link</a> with an ampersand in the URL.</p>
+<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&amp;T">AT&amp;T</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&amp;bar=2">link</a>.</p>
diff --git a/tests/markdown-test/angle-links-and-img.html b/tests/markdown-test/angle-links-and-img.html
new file mode 100755
index 0000000..e32b6e6
--- /dev/null
+++ b/tests/markdown-test/angle-links-and-img.html
@@ -0,0 +1,7 @@
+<p>
+ <a href="simple link" title="title">link</a>
+ <img alt="image" src="http://example.com/image.jpg" />
+ <a href="http://example.com/(()((())923)(">link</a>
+ <img alt="image" src="link(()))(" />
+</p>
+
diff --git a/tests/markdown-test/angle-links-and-img.txt b/tests/markdown-test/angle-links-and-img.txt
new file mode 100755
index 0000000..1dbf404
--- /dev/null
+++ b/tests/markdown-test/angle-links-and-img.txt
@@ -0,0 +1,4 @@
+[link](<simple link> "title")
+![image](<http://example.com/image.jpg>)
+[link](<http://example.com/(()((())923)(>)
+![image](<link(()))(>)
diff --git a/tests/markdown-test/auto-links.html b/tests/markdown-test/auto-links.html
index 229ebb4..100db34 100644
--- a/tests/markdown-test/auto-links.html
+++ b/tests/markdown-test/auto-links.html
@@ -1,5 +1,4 @@
-<p>Link: <a href="http://example.com/">http://example.com/</a>.
-</p>
+<p>Link: <a href="http://example.com/">http://example.com/</a>.</p>
<p>Https link: <a href="https://example.com">https://example.com</a>
</p>
<p>Ftp link: <a href="ftp://example.com">ftp://example.com</a>
@@ -7,21 +6,19 @@
<p>With an ampersand: <a href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</a>
</p>
<ul>
- <li>
- In a list?
- </li>
-
- <li>
- <a href="http://example.com/">http://example.com/</a>
- </li>
-
- <li>
- It should.
- </li>
+<li>In a list?</li>
+<li>
+<a href="http://example.com/">http://example.com/</a>
+</li>
+<li>It should.</li>
</ul>
-<blockquote><p>Blockquoted: <a href="http://example.com/">http://example.com/</a>
+<blockquote>
+<p>Blockquoted: <a href="http://example.com/">http://example.com/</a>
</p>
-</blockquote><p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code>
+</blockquote>
+<p>Auto-links should not occur here: <code>&lt;http://example.com/&gt;</code>
</p>
-<pre><code>or here: &lt;http://example.com/&gt;
-</code></pre> \ No newline at end of file
+<pre>
+<code>or here: &lt;http://example.com/&gt;
+</code>
+</pre> \ No newline at end of file
diff --git a/tests/markdown-test/backlash-escapes.html b/tests/markdown-test/backlash-escapes.html
index 77ecde7..f99082a 100644
--- a/tests/markdown-test/backlash-escapes.html
+++ b/tests/markdown-test/backlash-escapes.html
@@ -1,41 +1,23 @@
-
-<p>These should all get escaped:
-</p>
-<p>Backslash: \
-</p>
-<p>Backtick: `
-</p>
-<p>Asterisk: *
-</p>
-<p>Underscore: _
-</p>
-<p>Left brace: {
-</p>
-<p>Right brace: }
-</p>
-<p>Left bracket: [
-</p>
-<p>Right bracket: ]
-</p>
-<p>Left paren: (
-</p>
-<p>Right paren: )
-</p>
-<p>Greater-than: &gt;
-</p>
-<p>Hash: #
-</p>
-<p>Period: .
-</p>
-<p>Bang: !
-</p>
-<p>Plus: +
-</p>
-<p>Minus: -
-</p>
-<p>These should not, because they occur within a code block:
-</p>
-<pre><code>Backslash: \\
+<p>These should all get escaped:</p>
+<p>Backslash: \</p>
+<p>Backtick: `</p>
+<p>Asterisk: *</p>
+<p>Underscore: _</p>
+<p>Left brace: {</p>
+<p>Right brace: }</p>
+<p>Left bracket: [</p>
+<p>Right bracket: ]</p>
+<p>Left paren: (</p>
+<p>Right paren: )</p>
+<p>Greater-than: &gt;</p>
+<p>Hash: #</p>
+<p>Period: .</p>
+<p>Bang: !</p>
+<p>Plus: +</p>
+<p>Minus: -</p>
+<p>These should not, because they occur within a code block:</p>
+<pre>
+<code>Backslash: \\
Backtick: \`
@@ -66,8 +48,9 @@ Bang: \!
Plus: \+
Minus: \-
-</code></pre><p>Nor should these, which occur in code spans:
-</p>
+</code>
+</pre>
+<p>Nor should these, which occur in code spans:</p>
<p>Backslash: <code>\\</code>
</p>
<p>Backtick: <code>\`</code>
@@ -99,6 +82,4 @@ Minus: \-
<p>Plus: <code>\+</code>
</p>
<p>Minus: <code>\-</code>
-</p>
-
-
+</p> \ No newline at end of file
diff --git a/tests/markdown-test/benchmark.dat b/tests/markdown-test/benchmark.dat
index 5b645ed..3d549dd 100644
--- a/tests/markdown-test/benchmark.dat
+++ b/tests/markdown-test/benchmark.dat
@@ -1,20 +1,20 @@
construction:0.000000:0.000000
-amps-and-angle-encoding:0.060000:0.000000
-auto-links:0.070000:135168.000000
-backlash-escapes:0.220000:360448.000000
+amps-and-angle-encoding:0.070000:131072.000000
+auto-links:0.080000:397312.000000
+backlash-escapes:0.270000:884736.000000
blockquotes-with-dode-blocks:0.020000:0.000000
-hard-wrapped:0.010000:0.000000
-horizontal-rules:0.140000:0.000000
-inline-html-advanced:0.060000:0.000000
-inline-html-comments:0.070000:0.000000
-inline-html-simple:0.170000:0.000000
-links-inline:0.100000:0.000000
-links-reference:0.120000:0.000000
-literal-quotes:0.070000:0.000000
-markdown-documentation-basics:0.740000:1175552.000000
-markdown-syntax:3.030000:2596864.000000
-nested-blockquotes:0.100000:0.000000
-ordered-and-unordered-list:0.360000:0.000000
-strong-and-em-together:0.110000:0.000000
-tabs:0.120000:0.000000
-tidyness:0.120000:0.000000
+hard-wrapped:0.020000:0.000000
+horizontal-rules:0.180000:135168.000000
+inline-html-advanced:0.070000:0.000000
+inline-html-comments:0.080000:0.000000
+inline-html-simple:0.210000:0.000000
+links-inline:0.140000:0.000000
+links-reference:0.170000:0.000000
+literal-quotes:0.090000:0.000000
+markdown-documentation-basics:0.690000:1806336.000000
+markdown-syntax:3.310000:6696960.000000
+nested-blockquotes:0.200000:0.000000
+ordered-and-unordered-list:0.530000:0.000000
+strong-and-em-together:0.200000:0.000000
+tabs:0.200000:0.000000
+tidyness:0.200000:0.000000
diff --git a/tests/markdown-test/blockquotes-with-dode-blocks.html b/tests/markdown-test/blockquotes-with-dode-blocks.html
index 275749f..e7c79d9 100644
--- a/tests/markdown-test/blockquotes-with-dode-blocks.html
+++ b/tests/markdown-test/blockquotes-with-dode-blocks.html
@@ -1,13 +1,16 @@
-
-<blockquote><p>Example:
-</p>
-<pre><code>sub status {
+<blockquote>
+<p>Example:</p>
+<pre>
+<code>sub status {
print "working";
}
-</code></pre><p>Or:
-</p>
-<pre><code>sub status {
+</code>
+</pre>
+<p>Or:</p>
+<pre>
+<code>sub status {
return "working";
}
-</code></pre></blockquote>
-
+</code>
+</pre>
+</blockquote> \ No newline at end of file
diff --git a/tests/markdown-test/hard-wrapped.html b/tests/markdown-test/hard-wrapped.html
index 7d16a85..e28e900 100644
--- a/tests/markdown-test/hard-wrapped.html
+++ b/tests/markdown-test/hard-wrapped.html
@@ -1,12 +1,7 @@
-
<p>In Markdown 1.0.0 and earlier. Version
- 8. This line turns into a list item.
- Because a hard-wrapped line in the
- middle of a paragraph looked like a
- list item.
-</p>
+8. This line turns into a list item.
+Because a hard-wrapped line in the
+middle of a paragraph looked like a
+list item.</p>
<p>Here's one with a bullet.
- * criminey.
-</p>
-
-
+* criminey.</p> \ No newline at end of file
diff --git a/tests/markdown-test/horizontal-rules.html b/tests/markdown-test/horizontal-rules.html
index 253e36a..98c9c90 100644
--- a/tests/markdown-test/horizontal-rules.html
+++ b/tests/markdown-test/horizontal-rules.html
@@ -1,63 +1,51 @@
-
-<p>Dashes:
-</p>
+<p>Dashes:</p>
<hr />
-
<hr />
-
<hr />
-
<hr />
-
-<pre><code>---
-</code></pre><hr />
-
+<pre>
+<code>---
+</code>
+</pre>
<hr />
-
<hr />
-
<hr />
-
-<pre><code>- - -
-</code></pre><p>Asterisks:
-</p>
<hr />
-
+<pre>
+<code>- - -
+</code>
+</pre>
+<p>Asterisks:</p>
<hr />
-
<hr />
-
<hr />
-
-<pre><code>***
-</code></pre><hr />
-
<hr />
-
+<pre>
+<code>***
+</code>
+</pre>
<hr />
-
<hr />
-
-<pre><code>* * *
-</code></pre><p>Underscores:
-</p>
<hr />
-
<hr />
-
+<pre>
+<code>* * *
+</code>
+</pre>
+<p>Underscores:</p>
<hr />
-
<hr />
-
-<pre><code>___
-</code></pre><hr />
-
<hr />
-
<hr />
-
+<pre>
+<code>___
+</code>
+</pre>
<hr />
-
-<pre><code>_ _ _
-</code></pre>
-
+<hr />
+<hr />
+<hr />
+<pre>
+<code>_ _ _
+</code>
+</pre>
diff --git a/tests/markdown-test/inline-html-advanced.html b/tests/markdown-test/inline-html-advanced.html
index c72bebb..0f922e9 100644
--- a/tests/markdown-test/inline-html-advanced.html
+++ b/tests/markdown-test/inline-html-advanced.html
@@ -1,18 +1,11 @@
-
-<p>Simple block on one line:
-</p>
-<div>foo</div>
-
-<p>And nested without indentation:
-</p>
-<div>
+<p>Simple block on one line:</p>
+<p><div>foo</div></p>
+<p>And nested without indentation:</p>
+<p><div>
<div>
<div>
foo
</div>
</div>
<div>bar</div>
-</div>
-
-
-
+</div></p> \ No newline at end of file
diff --git a/tests/markdown-test/inline-html-comments.html b/tests/markdown-test/inline-html-comments.html
index a85c8b2..8b538d6 100644
--- a/tests/markdown-test/inline-html-comments.html
+++ b/tests/markdown-test/inline-html-comments.html
@@ -1,17 +1,8 @@
-
-<p>Paragraph one.
-</p>
-<!-- This is a simple comment -->
-
-<!--
+<p>Paragraph one.</p>
+<p><!-- This is a simple comment --></p>
+<p><!--
This is another comment.
--->
-
-<p>Paragraph two.
-</p>
-<!-- one comment block -- -- with two comments -->
-
-<p>The end.
-</p>
-
-
+--></p>
+<p>Paragraph two.</p>
+<p><!-- one comment block -- -- with two comments --></p>
+<p>The end.</p> \ No newline at end of file
diff --git a/tests/markdown-test/inline-html-simple.html b/tests/markdown-test/inline-html-simple.html
index b5b3f12..efb50dc 100644
--- a/tests/markdown-test/inline-html-simple.html
+++ b/tests/markdown-test/inline-html-simple.html
@@ -1,68 +1,53 @@
-
-<p>Here's a simple block:
-</p>
-<div>
+<p>Here's a simple block:</p>
+<p><div>
foo
-</div>
-
-<p>This should be a code block, though:
-</p>
-<pre><code>&lt;div&gt;
+</div></p>
+<p>This should be a code block, though:</p>
+<pre>
+<code>&lt;div&gt;
foo
&lt;/div&gt;
-</code></pre><p>As should this:
-</p>
-<pre><code>&lt;div&gt;foo&lt;/div&gt;
-</code></pre><p>Now, nested:
-</p>
-<div>
+</code>
+</pre>
+<p>As should this:</p>
+<pre>
+<code>&lt;div&gt;foo&lt;/div&gt;
+</code>
+</pre>
+<p>Now, nested:</p>
+<p><div>
<div>
<div>
foo
</div>
</div>
-</div>
-
-<p>This should just be an HTML comment:
-</p>
-<!-- Comment -->
-
-<p>Multiline:
-</p>
-<!--
+</div></p>
+<p>This should just be an HTML comment:</p>
+<p><!-- Comment --></p>
+<p>Multiline:</p>
+<p><!--
Blah
Blah
--->
-
-<p>Code block:
-</p>
-<pre><code>&lt;!-- Comment --&gt;
-</code></pre><p>Just plain comment, with trailing spaces on the line:
-</p>
-<!-- foo -->
-
-<p>Code:
-</p>
-<pre><code>&lt;hr /&gt;
-</code></pre><p>Hr's:
-</p>
-<hr>
-
-<hr/>
-
-<hr />
-
-<hr>
-
-<hr/>
-
-<hr />
-
-<hr class="foo" id="bar" />
-
-<hr class="foo" id="bar"/>
-
-<hr class="foo" id="bar" >
-
-
-
+--></p>
+<p>Code block:</p>
+<pre>
+<code>&lt;!-- Comment --&gt;
+</code>
+</pre>
+<p>Just plain comment, with trailing spaces on the line:</p>
+<p><!-- foo --></p>
+<p>Code:</p>
+<pre>
+<code>&lt;hr /&gt;
+</code>
+</pre>
+<p>Hr's:</p>
+<p><hr></p>
+<p><hr/></p>
+<p><hr /></p>
+<p><hr></p>
+<p><hr/></p>
+<p><hr /></p>
+<p><hr class="foo" id="bar" /></p>
+<p><hr class="foo" id="bar"/></p>
+<p><hr class="foo" id="bar" ></p> \ No newline at end of file
diff --git a/tests/markdown-test/links-inline.html b/tests/markdown-test/links-inline.html
index 5fb7073..e1aaf27 100644
--- a/tests/markdown-test/links-inline.html
+++ b/tests/markdown-test/links-inline.html
@@ -1,13 +1,9 @@
-
-<p>Just a <a href="/url/">URL</a>.
-</p>
-<p><a href="/url/" title="title">URL and title</a>.
-</p>
-<p><a href="/url/" title="title preceded by two spaces">URL and title</a>.
-</p>
-<p><a href="/url/" title="title preceded by a tab">URL and title</a>.
-</p>
-<p><a href="">Empty</a>.
-</p>
-
-
+<p>Just a <a href="/url/">URL</a>.</p>
+<p>
+<a href="/url/" title="title">URL and title</a>.</p>
+<p>
+<a href="/url/" title="title preceded by two spaces">URL and title</a>.</p>
+<p>
+<a href="/url/" title="title preceded by a tab">URL and title</a>.</p>
+<p>
+<a href="">Empty</a>.</p> \ No newline at end of file
diff --git a/tests/markdown-test/links-reference.html b/tests/markdown-test/links-reference.html
index ad6438f..338aa22 100644
--- a/tests/markdown-test/links-reference.html
+++ b/tests/markdown-test/links-reference.html
@@ -1,20 +1,12 @@
-
-<p>Foo <a href="/url/" title="Title">bar</a>.
-</p>
-<p>Foo <a href="/url/" title="Title">bar</a>.
-</p>
-<p>Foo <a href="/url/" title="Title">bar</a>.
-</p>
-<p>With <a href="/url/">embedded [brackets]</a>.
-</p>
-<p>Indented <a href="/url">once</a>.
-</p>
-<p>Indented <a href="/url">twice</a>.
-</p>
-<p>Indented <a href="/url">thrice</a>.
-</p>
-<p>Indented [four][] times.
-</p>
-<pre><code>[four]: /url
-</code></pre>
-
+<p>Foo <a href="/url/" title="Title">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title">bar</a>.</p>
+<p>With <a href="/url/">embedded [brackets]</a>.</p>
+<p>Indented <a href="/url">once</a>.</p>
+<p>Indented <a href="/url">twice</a>.</p>
+<p>Indented <a href="/url">thrice</a>.</p>
+<p>Indented [four][] times.</p>
+<pre>
+<code>[four]: /url
+</code>
+</pre> \ No newline at end of file
diff --git a/tests/markdown-test/literal-quotes.html b/tests/markdown-test/literal-quotes.html
index 806b7f2..0342589 100644
--- a/tests/markdown-test/literal-quotes.html
+++ b/tests/markdown-test/literal-quotes.html
@@ -1,7 +1,2 @@
-
-<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.
-</p>
-<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.
-</p>
-
-
+<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p>
+<p>Foo <a href="/url/" title="Title with &quot;quotes&quot; inside">bar</a>.</p> \ No newline at end of file
diff --git a/tests/markdown-test/markdown-documentation-basics.html b/tests/markdown-test/markdown-documentation-basics.html
index 744e89d..6755f77 100644
--- a/tests/markdown-test/markdown-documentation-basics.html
+++ b/tests/markdown-test/markdown-documentation-basics.html
@@ -1,47 +1,39 @@
<h1>Markdown: Basics</h1>
-<ul id="ProjectSubmenu">
+<p><ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a class="selected" title="Markdown Basics">Basics</a></li>
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
-</ul>
-
-
+</ul></p>
<h2>Getting the Gist of Markdown's Formatting Syntax</h2>
<p>This page offers a brief overview of what it's like to use Markdown.
- The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
- every feature, but Markdown should be very easy to pick up simply by
- looking at a few examples of it in action. The examples on this page
- are written in a before/after style, showing example syntax and the
- HTML output produced by Markdown.
-</p>
+The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
+every feature, but Markdown should be very easy to pick up simply by
+looking at a few examples of it in action. The examples on this page
+are written in a before/after style, showing example syntax and the
+HTML output produced by Markdown.</p>
<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a
- web application that allows you type your own Markdown-formatted text
- and translate it to XHTML.
-</p>
-<p><strong>Note:</strong> This document is itself written using Markdown; you
- can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.
-</p>
-
+web application that allows you type your own Markdown-formatted text
+and translate it to XHTML.</p>
+<p>
+ <strong>Note:</strong> This document is itself written using Markdown; you
+can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p>
<h2>Paragraphs, Headers, Blockquotes</h2>
<p>A paragraph is simply one or more consecutive lines of text, separated
- by one or more blank lines. (A blank line is any line that looks like a
- blank line -- a line containing nothing spaces or tabs is considered
- blank.) Normal paragraphs should not be intended with spaces or tabs.
-</p>
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing spaces or tabs is considered
+blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>.
- Setext-style headers for <code>&lt;h1&gt;</code> and <code>&lt;h2&gt;</code> are created by
- "underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
- To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
- beginning of the line -- the number of hashes equals the resulting
- HTML header level.
-</p>
-<p>Blockquotes are indicated using email-style '<code>&gt;</code>' angle brackets.
-</p>
-<p>Markdown:
-</p>
-<pre><code>A First Level Header
+Setext-style headers for <code>&lt;h1&gt;</code> and <code>&lt;h2&gt;</code> are created by
+"underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
+To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
+beginning of the line -- the number of hashes equals the resulting
+HTML header level.</p>
+<p>Blockquotes are indicated using email-style '<code>&gt;</code>' angle brackets.</p>
+<p>Markdown:</p>
+<pre>
+ <code>A First Level Header
====================
A Second Level Header
@@ -61,9 +53,11 @@ dog's back.
&gt; This is the second paragraph in the blockquote.
&gt;
&gt; ## This is an H2 in a blockquote
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;h1&gt;A First Level Header&lt;/h1&gt;
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;h1&gt;A First Level Header&lt;/h1&gt;
&lt;h2&gt;A Second Level Header&lt;/h2&gt;
@@ -83,179 +77,222 @@ dog's back.&lt;/p&gt;
&lt;h2&gt;This is an H2 in a blockquote&lt;/h2&gt;
&lt;/blockquote&gt;
-</code></pre>
+</code>
+</pre>
<h3>Phrase Emphasis</h3>
-<p>Markdown uses asterisks and underscores to indicate spans of emphasis.
-</p>
-<p>Markdown:
-</p>
-<pre><code>Some of these words *are emphasized*.
+<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p>
+<p>Markdown:</p>
+<pre>
+ <code>Some of these words *are emphasized*.
Some of these words _are emphasized also_.
Use two asterisks for **strong emphasis**.
Or, if you prefer, __use two underscores instead__.
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
Some of these words &lt;em&gt;are emphasized also&lt;/em&gt;.&lt;/p&gt;
&lt;p&gt;Use two asterisks for &lt;strong&gt;strong emphasis&lt;/strong&gt;.
Or, if you prefer, &lt;strong&gt;use two underscores instead&lt;/strong&gt;.&lt;/p&gt;
-</code></pre>
+</code>
+</pre>
<h2>Lists</h2>
<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
- <code>+</code>, and <code>-</code>) as list markers. These three markers are
- interchangable; this:
-</p>
-<pre><code>* Candy.
+<code>+</code>, and <code>-</code>) as list markers. These three markers are
+interchangable; this:</p>
+<pre>
+ <code>* Candy.
* Gum.
* Booze.
-</code></pre><p>this:
-</p>
-<pre><code>+ Candy.
+</code>
+</pre>
+<p>this:</p>
+<pre>
+ <code>+ Candy.
+ Gum.
+ Booze.
-</code></pre><p>and this:
-</p>
-<pre><code>- Candy.
+</code>
+</pre>
+<p>and this:</p>
+<pre>
+ <code>- Candy.
- Gum.
- Booze.
-</code></pre><p>all produce the same output:
-</p>
-<pre><code>&lt;ul&gt;
+</code>
+</pre>
+<p>all produce the same output:</p>
+<pre>
+ <code>&lt;ul&gt;
&lt;li&gt;Candy.&lt;/li&gt;
&lt;li&gt;Gum.&lt;/li&gt;
&lt;li&gt;Booze.&lt;/li&gt;
&lt;/ul&gt;
-</code></pre><p>Ordered (numbered) lists use regular numbers, followed by periods, as
- list markers:
-</p>
-<pre><code>1. Red
+</code>
+</pre>
+<p>Ordered (numbered) lists use regular numbers, followed by periods, as
+list markers:</p>
+<pre>
+ <code>1. Red
2. Green
3. Blue
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;ol&gt;
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;ol&gt;
&lt;li&gt;Red&lt;/li&gt;
&lt;li&gt;Green&lt;/li&gt;
&lt;li&gt;Blue&lt;/li&gt;
&lt;/ol&gt;
-</code></pre><p>If you put blank lines between items, you'll get <code>&lt;p&gt;</code> tags for the
- list item text. You can create multi-paragraph list items by indenting
- the paragraphs by 4 spaces or 1 tab:
-</p>
-<pre><code>* A list item.
+</code>
+</pre>
+<p>If you put blank lines between items, you'll get <code>&lt;p&gt;</code> tags for the
+list item text. You can create multi-paragraph list items by indenting
+the paragraphs by 4 spaces or 1 tab:</p>
+<pre>
+ <code>* A list item.
With multiple paragraphs.
* Another item in the list.
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;ul&gt;
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;A list item.&lt;/p&gt;
&lt;p&gt;With multiple paragraphs.&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Another item in the list.&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
-</code></pre>
+</code>
+</pre>
<h3>Links</h3>
<p>Markdown supports two styles for creating links: <em>inline</em> and
- <em>reference</em>. With both styles, you use square brackets to delimit the
- text you want to turn into a link.
-</p>
+<em>reference</em>. With both styles, you use square brackets to delimit the
+text you want to turn into a link.</p>
<p>Inline-style links use parentheses immediately after the link text.
- For example:
-</p>
-<pre><code>This is an [example link](http://example.com/).
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/"&gt;
+For example:</p>
+<pre>
+ <code>This is an [example link](http://example.com/).
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;This is an &lt;a href="http://example.com/"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
-</code></pre><p>Optionally, you may include a title attribute in the parentheses:
-</p>
-<pre><code>This is an [example link](http://example.com/ "With a Title").
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;This is an &lt;a href="http://example.com/" title="With a Title"&gt;
+</code>
+</pre>
+<p>Optionally, you may include a title attribute in the parentheses:</p>
+<pre>
+ <code>This is an [example link](http://example.com/ "With a Title").
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;This is an &lt;a href="http://example.com/" title="With a Title"&gt;
example link&lt;/a&gt;.&lt;/p&gt;
-</code></pre><p>Reference-style links allow you to refer to your links by names, which
- you define elsewhere in your document:
-</p>
-<pre><code>I get 10 times more traffic from [Google][1] than from
+</code>
+</pre>
+<p>Reference-style links allow you to refer to your links by names, which
+you define elsewhere in your document:</p>
+<pre>
+ <code>I get 10 times more traffic from [Google][1] than from
[Yahoo][2] or [MSN][3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from &lt;a href="http://search.yahoo.com/"
title="Yahoo Search"&gt;Yahoo&lt;/a&gt; or &lt;a href="http://search.msn.com/"
title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
-</code></pre><p>The title attribute is optional. Link names may contain letters,
- numbers and spaces, but are <em>not</em> case sensitive:
-</p>
-<pre><code>I start my morning with a cup of coffee and
+</code>
+</pre>
+<p>The title attribute is optional. Link names may contain letters,
+numbers and spaces, but are <em>not</em> case sensitive:</p>
+<pre>
+ <code>I start my morning with a cup of coffee and
[The New York Times][NY Times].
[ny times]: http://www.nytimes.com/
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;I start my morning with a cup of coffee and
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;I start my morning with a cup of coffee and
&lt;a href="http://www.nytimes.com/"&gt;The New York Times&lt;/a&gt;.&lt;/p&gt;
-</code></pre>
+</code>
+</pre>
<h3>Images</h3>
-<p>Image syntax is very much like link syntax.
-</p>
-<p>Inline (titles are optional):
-</p>
-<pre><code>![alt text](/path/to/img.jpg "Title")
-</code></pre><p>Reference-style:
-</p>
-<pre><code>![alt text][id]
+<p>Image syntax is very much like link syntax.</p>
+<p>Inline (titles are optional):</p>
+<pre>
+ <code>![alt text](/path/to/img.jpg "Title")
+</code>
+</pre>
+<p>Reference-style:</p>
+<pre>
+ <code>![alt text][id]
[id]: /path/to/img.jpg "Title"
-</code></pre><p>Both of the above examples produce the same output:
-</p>
-<pre><code>&lt;img src="/path/to/img.jpg" alt="alt text" title="Title" /&gt;
-</code></pre>
+</code>
+</pre>
+<p>Both of the above examples produce the same output:</p>
+<pre>
+ <code>&lt;img src="/path/to/img.jpg" alt="alt text" title="Title" /&gt;
+</code>
+</pre>
<h3>Code</h3>
<p>In a regular paragraph, you can create code span by wrapping text in
- backtick quotes. Any ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> or
- <code>&gt;</code>) will automatically be translated into HTML entities. This makes
- it easy to use Markdown to write about HTML example code:
-</p>
-<pre><code>I strongly recommend against using any `&lt;blink&gt;` tags.
+backtick quotes. Any ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> or
+<code>&gt;</code>) will automatically be translated into HTML entities. This makes
+it easy to use Markdown to write about HTML example code:</p>
+<pre>
+ <code>I strongly recommend against using any `&lt;blink&gt;` tags.
I wish SmartyPants used named entities like `&amp;mdash;`
instead of decimal-encoded entites like `&amp;#8212;`.
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;I strongly recommend against using any
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;I strongly recommend against using any
&lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
&lt;p&gt;I wish SmartyPants used named entities like
&lt;code&gt;&amp;amp;mdash;&lt;/code&gt; instead of decimal-encoded
entites like &lt;code&gt;&amp;amp;#8212;&lt;/code&gt;.&lt;/p&gt;
-</code></pre><p>To specify an entire block of pre-formatted code, indent every line of
- the block by 4 spaces or 1 tab. Just like with code spans, <code>&amp;</code>, <code>&lt;</code>,
- and <code>&gt;</code> characters will be escaped automatically.
-</p>
-<p>Markdown:
-</p>
-<pre><code>If you want your page to validate under XHTML 1.0 Strict,
+</code>
+</pre>
+<p>To specify an entire block of pre-formatted code, indent every line of
+the block by 4 spaces or 1 tab. Just like with code spans, <code>&amp;</code>, <code>&lt;</code>,
+and <code>&gt;</code> characters will be escaped automatically.</p>
+<p>Markdown:</p>
+<pre>
+ <code>If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:
&lt;blockquote&gt;
&lt;p&gt;For example.&lt;/p&gt;
&lt;/blockquote&gt;
-</code></pre><p>Output:
-</p>
-<pre><code>&lt;p&gt;If you want your page to validate under XHTML 1.0 Strict,
+</code>
+</pre>
+<p>Output:</p>
+<pre>
+ <code>&lt;p&gt;If you want your page to validate under XHTML 1.0 Strict,
you've got to put paragraph tags in your blockquotes:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;&amp;lt;blockquote&amp;gt;
&amp;lt;p&amp;gt;For example.&amp;lt;/p&amp;gt;
&amp;lt;/blockquote&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
-</code></pre> \ No newline at end of file
+</code>
+</pre>
+
diff --git a/tests/markdown-test/markdown-syntax.html b/tests/markdown-test/markdown-syntax.html
index c789d95..b3df92f 100644
--- a/tests/markdown-test/markdown-syntax.html
+++ b/tests/markdown-test/markdown-syntax.html
@@ -1,147 +1,121 @@
<h1>Markdown: Syntax</h1>
-<ul id="ProjectSubmenu">
+<p><ul id="ProjectSubmenu">
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
<li><a href="/projects/markdown/basics" title="Markdown Basics">Basics</a></li>
<li><a class="selected" title="Markdown Syntax Documentation">Syntax</a></li>
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
-</ul>
-
+</ul></p>
<ul>
- <li>
- <a href="#overview">Overview</a><ul>
- <li>
- <a href="#philosophy">Philosophy</a>
- </li>
-
- <li>
- <a href="#html">Inline HTML</a>
- </li>
-
- <li>
- <a href="#autoescape">Automatic Escaping for Special Characters</a>
- </li>
-</ul>
-
- </li>
-
- <li>
- <a href="#block">Block Elements</a><ul>
- <li>
- <a href="#p">Paragraphs and Line Breaks</a>
- </li>
-
- <li>
- <a href="#header">Headers</a>
- </li>
-
- <li>
- <a href="#blockquote">Blockquotes</a>
- </li>
-
- <li>
- <a href="#list">Lists</a>
- </li>
-
- <li>
- <a href="#precode">Code Blocks</a>
- </li>
-
- <li>
- <a href="#hr">Horizontal Rules</a>
- </li>
-</ul>
-
- </li>
-
- <li>
- <a href="#span">Span Elements</a><ul>
- <li>
- <a href="#link">Links</a>
- </li>
-
- <li>
- <a href="#em">Emphasis</a>
- </li>
-
- <li>
- <a href="#code">Code</a>
- </li>
-
- <li>
- <a href="#img">Images</a>
- </li>
-</ul>
-
- </li>
-
- <li>
- <a href="#misc">Miscellaneous</a><ul>
- <li>
- <a href="#backslash">Backslash Escapes</a>
- </li>
-
- <li>
- <a href="#autolink">Automatic Links</a>
- </li>
-</ul>
-
- </li>
+ <li>
+ <a href="#overview">Overview</a>
+ <ul>
+ <li>
+ <a href="#philosophy">Philosophy</a>
+ </li>
+ <li>
+ <a href="#html">Inline HTML</a>
+ </li>
+ <li>
+ <a href="#autoescape">Automatic Escaping for Special Characters</a>
+ </li>
+ </ul>
+ </li>
+ <li>
+ <a href="#block">Block Elements</a>
+ <ul>
+ <li>
+ <a href="#p">Paragraphs and Line Breaks</a>
+ </li>
+ <li>
+ <a href="#header">Headers</a>
+ </li>
+ <li>
+ <a href="#blockquote">Blockquotes</a>
+ </li>
+ <li>
+ <a href="#list">Lists</a>
+ </li>
+ <li>
+ <a href="#precode">Code Blocks</a>
+ </li>
+ <li>
+ <a href="#hr">Horizontal Rules</a>
+ </li>
+ </ul>
+ </li>
+ <li>
+ <a href="#span">Span Elements</a>
+ <ul>
+ <li>
+ <a href="#link">Links</a>
+ </li>
+ <li>
+ <a href="#em">Emphasis</a>
+ </li>
+ <li>
+ <a href="#code">Code</a>
+ </li>
+ <li>
+ <a href="#img">Images</a>
+ </li>
+ </ul>
+ </li>
+ <li>
+ <a href="#misc">Miscellaneous</a>
+ <ul>
+ <li>
+ <a href="#backslash">Backslash Escapes</a>
+ </li>
+ <li>
+ <a href="#autolink">Automatic Links</a>
+ </li>
+ </ul>
+ </li>
</ul>
-<p><strong>Note:</strong> This document is itself written using Markdown; you
- can <a href="/projects/markdown/syntax.text">see the source for it by adding '.text' to the URL</a>.
-</p>
+<p>
+ <strong>Note:</strong> This document is itself written using Markdown; you
+can <a href="/projects/markdown/syntax.text">see the source for it by adding '.text' to the URL</a>.</p>
<hr />
-
-<h2 id="overview">Overview</h2>
-
-<h3 id="philosophy">Philosophy</h3>
-
-<p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.
-</p>
+<p><h2 id="overview">Overview</h2></p>
+<p><h3 id="philosophy">Philosophy</h3></p>
+<p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.</p>
<p>Readability, however, is emphasized above all else. A Markdown-formatted
- document should be publishable as-is, as plain text, without looking
- like it's been marked up with tags or formatting instructions. While
- Markdown's syntax has been influenced by several existing text-to-HTML
- filters -- including <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a>, <a href="http://www.aaronsw.com/2002/atx/">atx</a>, <a href="http://textism.com/tools/textile/">Textile</a>, <a href="http://docutils.sourceforge.net/rst.html">reStructuredText</a>,
- <a href="http://www.triptico.com/software/grutatxt.html">Grutatext</a>, and <a href="http://ettext.taint.org/doc/">EtText</a> -- the single biggest source of
- inspiration for Markdown's syntax is the format of plain text email.
-</p>
+document should be publishable as-is, as plain text, without looking
+like it's been marked up with tags or formatting instructions. While
+Markdown's syntax has been influenced by several existing text-to-HTML
+filters -- including <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a>, <a href="http://www.aaronsw.com/2002/atx/">atx</a>, <a href="http://textism.com/tools/textile/">Textile</a>, <a href="http://docutils.sourceforge.net/rst.html">reStructuredText</a>,
+<a href="http://www.triptico.com/software/grutatxt.html">Grutatext</a>, and <a href="http://ettext.taint.org/doc/">EtText</a> -- the single biggest source of
+inspiration for Markdown's syntax is the format of plain text email.</p>
<p>To this end, Markdown's syntax is comprised entirely of punctuation
- characters, which punctuation characters have been carefully chosen so
- as to look like what they mean. E.g., asterisks around a word actually
- look like *emphasis*. Markdown lists look like, well, lists. Even
- blockquotes look like quoted passages of text, assuming you've ever
- used email.
-</p>
-<h3 id="html">Inline HTML</h3>
-
+characters, which punctuation characters have been carefully chosen so
+as to look like what they mean. E.g., asterisks around a word actually
+look like *emphasis*. Markdown lists look like, well, lists. Even
+blockquotes look like quoted passages of text, assuming you've ever
+used email.</p>
+<p><h3 id="html">Inline HTML</h3></p>
<p>Markdown's syntax is intended for one purpose: to be used as a
- format for <em>writing</em> for the web.
-</p>
+format for <em>writing</em> for the web.</p>
<p>Markdown is not a replacement for HTML, or even close to it. Its
- syntax is very small, corresponding only to a very small subset of
- HTML tags. The idea is <em>not</em> to create a syntax that makes it easier
- to insert HTML tags. In my opinion, HTML tags are already easy to
- insert. The idea for Markdown is to make it easy to read, write, and
- edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em>
- format. Thus, Markdown's formatting syntax only addresses issues that
- can be conveyed in plain text.
-</p>
+syntax is very small, corresponding only to a very small subset of
+HTML tags. The idea is <em>not</em> to create a syntax that makes it easier
+to insert HTML tags. In my opinion, HTML tags are already easy to
+insert. The idea for Markdown is to make it easy to read, write, and
+edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em>
+format. Thus, Markdown's formatting syntax only addresses issues that
+can be conveyed in plain text.</p>
<p>For any markup that is not covered by Markdown's syntax, you simply
- use HTML itself. There's no need to preface it or delimit it to
- indicate that you're switching from Markdown to HTML; you just use
- the tags.
-</p>
+use HTML itself. There's no need to preface it or delimit it to
+indicate that you're switching from Markdown to HTML; you just use
+the tags.</p>
<p>The only restrictions are that block-level HTML elements -- e.g. <code>&lt;div&gt;</code>,
- <code>&lt;table&gt;</code>, <code>&lt;pre&gt;</code>, <code>&lt;p&gt;</code>, etc. -- must be separated from surrounding
- content by blank lines, and the start and end tags of the block should
- not be indented with tabs or spaces. Markdown is smart enough not
- to add extra (unwanted) <code>&lt;p&gt;</code> tags around HTML block-level tags.
-</p>
-<p>For example, to add an HTML table to a Markdown article:
-</p>
-<pre><code>This is a regular paragraph.
+<code>&lt;table&gt;</code>, <code>&lt;pre&gt;</code>, <code>&lt;p&gt;</code>, etc. -- must be separated from surrounding
+content by blank lines, and the start and end tags of the block should
+not be indented with tabs or spaces. Markdown is smart enough not
+to add extra (unwanted) <code>&lt;p&gt;</code> tags around HTML block-level tags.</p>
+<p>For example, to add an HTML table to a Markdown article:</p>
+<pre>
+ <code>This is a regular paragraph.
&lt;table&gt;
&lt;tr&gt;
@@ -150,159 +124,168 @@
&lt;/table&gt;
This is another regular paragraph.
-</code></pre><p>Note that Markdown formatting syntax is not processed within block-level
- HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an
- HTML block.
-</p>
+</code>
+</pre>
+<p>Note that Markdown formatting syntax is not processed within block-level
+HTML tags. E.g., you can't use Markdown-style <code>*emphasis*</code> inside an
+HTML block.</p>
<p>Span-level HTML tags -- e.g. <code>&lt;span&gt;</code>, <code>&lt;cite&gt;</code>, or <code>&lt;del&gt;</code> -- can be
- used anywhere in a Markdown paragraph, list item, or header. If you
- want, you can even use HTML tags instead of Markdown formatting; e.g. if
- you'd prefer to use HTML <code>&lt;a&gt;</code> or <code>&lt;img&gt;</code> tags instead of Markdown's
- link or image syntax, go right ahead.
-</p>
+used anywhere in a Markdown paragraph, list item, or header. If you
+want, you can even use HTML tags instead of Markdown formatting; e.g. if
+you'd prefer to use HTML <code>&lt;a&gt;</code> or <code>&lt;img&gt;</code> tags instead of Markdown's
+link or image syntax, go right ahead.</p>
<p>Unlike block-level HTML tags, Markdown syntax <em>is</em> processed within
- span-level tags.
-</p>
-<h3 id="autoescape">Automatic Escaping for Special Characters</h3>
-
+span-level tags.</p>
+<p><h3 id="autoescape">Automatic Escaping for Special Characters</h3></p>
<p>In HTML, there are two characters that demand special treatment: <code>&lt;</code>
- and <code>&amp;</code>. Left angle brackets are used to start tags; ampersands are
- used to denote HTML entities. If you want to use them as literal
- characters, you must escape them as entities, e.g. <code>&amp;lt;</code>, and
- <code>&amp;amp;</code>.
-</p>
+and <code>&amp;</code>. Left angle brackets are used to start tags; ampersands are
+used to denote HTML entities. If you want to use them as literal
+characters, you must escape them as entities, e.g. <code>&amp;lt;</code>, and
+<code>&amp;amp;</code>.</p>
<p>Ampersands in particular are bedeviling for web writers. If you want to
- write about 'AT&amp;T', you need to write '<code>AT&amp;amp;T</code>'. You even need to
- escape ampersands within URLs. Thus, if you want to link to:
-</p>
-<pre><code>http://images.google.com/images?num=30&amp;q=larry+bird
-</code></pre><p>you need to encode the URL as:
-</p>
-<pre><code>http://images.google.com/images?num=30&amp;amp;q=larry+bird
-</code></pre><p>in your anchor tag <code>href</code> attribute. Needless to say, this is easy to
- forget, and is probably the single most common source of HTML validation
- errors in otherwise well-marked-up web sites.
-</p>
+write about 'AT&amp;T', you need to write '<code>AT&amp;amp;T</code>'. You even need to
+escape ampersands within URLs. Thus, if you want to link to:</p>
+<pre>
+ <code>http://images.google.com/images?num=30&amp;q=larry+bird
+</code>
+</pre>
+<p>you need to encode the URL as:</p>
+<pre>
+ <code>http://images.google.com/images?num=30&amp;amp;q=larry+bird
+</code>
+</pre>
+<p>in your anchor tag <code>href</code> attribute. Needless to say, this is easy to
+forget, and is probably the single most common source of HTML validation
+errors in otherwise well-marked-up web sites.</p>
<p>Markdown allows you to use these characters naturally, taking care of
- all the necessary escaping for you. If you use an ampersand as part of
- an HTML entity, it remains unchanged; otherwise it will be translated
- into <code>&amp;amp;</code>.
-</p>
-<p>So, if you want to include a copyright symbol in your article, you can write:
-</p>
-<pre><code>&amp;copy;
-</code></pre><p>and Markdown will leave it alone. But if you write:
-</p>
-<pre><code>AT&amp;T
-</code></pre><p>Markdown will translate it to:
-</p>
-<pre><code>AT&amp;amp;T
-</code></pre><p>Similarly, because Markdown supports <a href="#html">inline HTML</a>, if you use
- angle brackets as delimiters for HTML tags, Markdown will treat them as
- such. But if you write:
-</p>
-<pre><code>4 &lt; 5
-</code></pre><p>Markdown will translate it to:
-</p>
-<pre><code>4 &amp;lt; 5
-</code></pre><p>However, inside Markdown code spans and blocks, angle brackets and
- ampersands are <em>always</em> encoded automatically. This makes it easy to use
- Markdown to write about HTML code. (As opposed to raw HTML, which is a
- terrible format for writing about HTML syntax, because every single <code>&lt;</code>
- and <code>&amp;</code> in your example code needs to be escaped.)
-</p>
+all the necessary escaping for you. If you use an ampersand as part of
+an HTML entity, it remains unchanged; otherwise it will be translated
+into <code>&amp;amp;</code>.</p>
+<p>So, if you want to include a copyright symbol in your article, you can write:</p>
+<pre>
+ <code>&amp;copy;
+</code>
+</pre>
+<p>and Markdown will leave it alone. But if you write:</p>
+<pre>
+ <code>AT&amp;T
+</code>
+</pre>
+<p>Markdown will translate it to:</p>
+<pre>
+ <code>AT&amp;amp;T
+</code>
+</pre>
+<p>Similarly, because Markdown supports <a href="#html">inline HTML</a>, if you use
+angle brackets as delimiters for HTML tags, Markdown will treat them as
+such. But if you write:</p>
+<pre>
+ <code>4 &lt; 5
+</code>
+</pre>
+<p>Markdown will translate it to:</p>
+<pre>
+ <code>4 &amp;lt; 5
+</code>
+</pre>
+<p>However, inside Markdown code spans and blocks, angle brackets and
+ampersands are <em>always</em> encoded automatically. This makes it easy to use
+Markdown to write about HTML code. (As opposed to raw HTML, which is a
+terrible format for writing about HTML syntax, because every single <code>&lt;</code>
+and <code>&amp;</code> in your example code needs to be escaped.)</p>
<hr />
-
-<h2 id="block">Block Elements</h2>
-
-<h3 id="p">Paragraphs and Line Breaks</h3>
-
+<p><h2 id="block">Block Elements</h2></p>
+<p><h3 id="p">Paragraphs and Line Breaks</h3></p>
<p>A paragraph is simply one or more consecutive lines of text, separated
- by one or more blank lines. (A blank line is any line that looks like a
- blank line -- a line containing nothing but spaces or tabs is considered
- blank.) Normal paragraphs should not be intended with spaces or tabs.
-</p>
+by one or more blank lines. (A blank line is any line that looks like a
+blank line -- a line containing nothing but spaces or tabs is considered
+blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
<p>The implication of the "one or more consecutive lines of text" rule is
- that Markdown supports "hard-wrapped" text paragraphs. This differs
- significantly from most other text-to-HTML formatters (including Movable
- Type's "Convert Line Breaks" option) which translate every line break
- character in a paragraph into a <code>&lt;br /&gt;</code> tag.
-</p>
+that Markdown supports "hard-wrapped" text paragraphs. This differs
+significantly from most other text-to-HTML formatters (including Movable
+Type's "Convert Line Breaks" option) which translate every line break
+character in a paragraph into a <code>&lt;br /&gt;</code> tag.</p>
<p>When you <em>do</em> want to insert a <code>&lt;br /&gt;</code> break tag using Markdown, you
- end a line with two or more spaces, then type return.
-</p>
+end a line with two or more spaces, then type return.</p>
<p>Yes, this takes a tad more effort to create a <code>&lt;br /&gt;</code>, but a simplistic
- "every line break is a <code>&lt;br /&gt;</code>" rule wouldn't work for Markdown.
- Markdown's email-style <a href="#blockquote">blockquoting</a> and multi-paragraph <a href="#list">list items</a>
- work best -- and look better -- when you format them with hard breaks.
-</p>
-<h3 id="header">Headers</h3>
-
-<p>Markdown supports two styles of headers, <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a> and <a href="http://www.aaronsw.com/2002/atx/">atx</a>.
-</p>
+"every line break is a <code>&lt;br /&gt;</code>" rule wouldn't work for Markdown.
+Markdown's email-style <a href="#blockquote">blockquoting</a> and multi-paragraph <a href="#list">list items</a>
+work best -- and look better -- when you format them with hard breaks.</p>
+<p><h3 id="header">Headers</h3></p>
+<p>Markdown supports two styles of headers, <a href="http://docutils.sourceforge.net/mirror/setext.html">Setext</a> and <a href="http://www.aaronsw.com/2002/atx/">atx</a>.</p>
<p>Setext-style headers are "underlined" using equal signs (for first-level
- headers) and dashes (for second-level headers). For example:
-</p>
-<pre><code>This is an H1
+headers) and dashes (for second-level headers). For example:</p>
+<pre>
+ <code>This is an H1
=============
This is an H2
-------------
-</code></pre><p>Any number of underlining <code>=</code>'s or <code>-</code>'s will work.
-</p>
+</code>
+</pre>
+<p>Any number of underlining <code>=</code>'s or <code>-</code>'s will work.</p>
<p>Atx-style headers use 1-6 hash characters at the start of the line,
- corresponding to header levels 1-6. For example:
-</p>
-<pre><code># This is an H1
+corresponding to header levels 1-6. For example:</p>
+<pre>
+ <code># This is an H1
## This is an H2
###### This is an H6
-</code></pre><p>Optionally, you may "close" atx-style headers. This is purely
- cosmetic -- you can use this if you think it looks better. The
- closing hashes don't even need to match the number of hashes
- used to open the header. (The number of opening hashes
- determines the header level.) :
-</p>
-<pre><code># This is an H1 #
+</code>
+</pre>
+<p>Optionally, you may "close" atx-style headers. This is purely
+cosmetic -- you can use this if you think it looks better. The
+closing hashes don't even need to match the number of hashes
+used to open the header. (The number of opening hashes
+determines the header level.) :</p>
+<pre>
+ <code># This is an H1 #
## This is an H2 ##
### This is an H3 ######
-</code></pre><h3 id="blockquote">Blockquotes</h3>
-
+</code>
+</pre>
+<p><h3 id="blockquote">Blockquotes</h3></p>
<p>Markdown uses email-style <code>&gt;</code> characters for blockquoting. If you're
- familiar with quoting passages of text in an email message, then you
- know how to create a blockquote in Markdown. It looks best if you hard
- wrap the text and put a <code>&gt;</code> before every line:
-</p>
-<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+familiar with quoting passages of text in an email message, then you
+know how to create a blockquote in Markdown. It looks best if you hard
+wrap the text and put a <code>&gt;</code> before every line:</p>
+<pre>
+ <code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
&gt; consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
&gt; Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
&gt;
&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
&gt; id sem consectetuer libero luctus adipiscing.
-</code></pre><p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
- line of a hard-wrapped paragraph:
-</p>
-<pre><code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+</code>
+</pre>
+<p>Markdown allows you to be lazy and only put the <code>&gt;</code> before the first
+line of a hard-wrapped paragraph:</p>
+<pre>
+ <code>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
&gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
id sem consectetuer libero luctus adipiscing.
-</code></pre><p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
- adding additional levels of <code>&gt;</code>:
-</p>
-<pre><code>&gt; This is the first level of quoting.
+</code>
+</pre>
+<p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by
+adding additional levels of <code>&gt;</code>:</p>
+<pre>
+ <code>&gt; This is the first level of quoting.
&gt;
&gt; &gt; This is nested blockquote.
&gt;
&gt; Back to the first level.
-</code></pre><p>Blockquotes can contain other Markdown elements, including headers, lists,
- and code blocks:
-</p>
-<pre><code>&gt; ## This is a header.
+</code>
+</pre>
+<p>Blockquotes can contain other Markdown elements, including headers, lists,
+and code blocks:</p>
+<pre>
+ <code>&gt; ## This is a header.
&gt;
&gt; 1. This is the first list item.
&gt; 2. This is the second list item.
@@ -310,108 +293,130 @@ id sem consectetuer libero luctus adipiscing.
&gt; Here's some example code:
&gt;
&gt; return shell_exec("echo $input | $markdown_script");
-</code></pre><p>Any decent text editor should make email-style quoting easy. For
- example, with BBEdit, you can make a selection and choose Increase
- Quote Level from the Text menu.
-</p>
-<h3 id="list">Lists</h3>
-
-<p>Markdown supports ordered (numbered) and unordered (bulleted) lists.
-</p>
+</code>
+</pre>
+<p>Any decent text editor should make email-style quoting easy. For
+example, with BBEdit, you can make a selection and choose Increase
+Quote Level from the Text menu.</p>
+<p><h3 id="list">Lists</h3></p>
+<p>Markdown supports ordered (numbered) and unordered (bulleted) lists.</p>
<p>Unordered lists use asterisks, pluses, and hyphens -- interchangably
- -- as list markers:
-</p>
-<pre><code>* Red
+-- as list markers:</p>
+<pre>
+ <code>* Red
* Green
* Blue
-</code></pre><p>is equivalent to:
-</p>
-<pre><code>+ Red
+</code>
+</pre>
+<p>is equivalent to:</p>
+<pre>
+ <code>+ Red
+ Green
+ Blue
-</code></pre><p>and:
-</p>
-<pre><code>- Red
+</code>
+</pre>
+<p>and:</p>
+<pre>
+ <code>- Red
- Green
- Blue
-</code></pre><p>Ordered lists use numbers followed by periods:
-</p>
-<pre><code>1. Bird
+</code>
+</pre>
+<p>Ordered lists use numbers followed by periods:</p>
+<pre>
+ <code>1. Bird
2. McHale
3. Parish
-</code></pre><p>It's important to note that the actual numbers you use to mark the
- list have no effect on the HTML output Markdown produces. The HTML
- Markdown produces from the above list is:
-</p>
-<pre><code>&lt;ol&gt;
+</code>
+</pre>
+<p>It's important to note that the actual numbers you use to mark the
+list have no effect on the HTML output Markdown produces. The HTML
+Markdown produces from the above list is:</p>
+<pre>
+ <code>&lt;ol&gt;
&lt;li&gt;Bird&lt;/li&gt;
&lt;li&gt;McHale&lt;/li&gt;
&lt;li&gt;Parish&lt;/li&gt;
&lt;/ol&gt;
-</code></pre><p>If you instead wrote the list in Markdown like this:
-</p>
-<pre><code>1. Bird
+</code>
+</pre>
+<p>If you instead wrote the list in Markdown like this:</p>
+<pre>
+ <code>1. Bird
1. McHale
1. Parish
-</code></pre><p>or even:
-</p>
-<pre><code>3. Bird
+</code>
+</pre>
+<p>or even:</p>
+<pre>
+ <code>3. Bird
1. McHale
8. Parish
-</code></pre><p>you'd get the exact same HTML output. The point is, if you want to,
- you can use ordinal numbers in your ordered Markdown lists, so that
- the numbers in your source match the numbers in your published HTML.
- But if you want to be lazy, you don't have to.
-</p>
+</code>
+</pre>
+<p>you'd get the exact same HTML output. The point is, if you want to,
+you can use ordinal numbers in your ordered Markdown lists, so that
+the numbers in your source match the numbers in your published HTML.
+But if you want to be lazy, you don't have to.</p>
<p>If you do use lazy list numbering, however, you should still start the
- list with the number 1. At some point in the future, Markdown may support
- starting ordered lists at an arbitrary number.
-</p>
+list with the number 1. At some point in the future, Markdown may support
+starting ordered lists at an arbitrary number.</p>
<p>List markers typically start at the left margin, but may be indented by
- up to three spaces. List markers must be followed by one or more spaces
- or a tab.
-</p>
-<p>To make lists look nice, you can wrap items with hanging indents:
-</p>
-<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+up to three spaces. List markers must be followed by one or more spaces
+or a tab.</p>
+<p>To make lists look nice, you can wrap items with hanging indents:</p>
+<pre>
+ <code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
-</code></pre><p>But if you want to be lazy, you don't have to:
-</p>
-<pre><code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+</code>
+</pre>
+<p>But if you want to be lazy, you don't have to:</p>
+<pre>
+ <code>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
viverra nec, fringilla in, laoreet vitae, risus.
* Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
Suspendisse id sem consectetuer libero luctus adipiscing.
-</code></pre><p>If list items are separated by blank lines, Markdown will wrap the
- items in <code>&lt;p&gt;</code> tags in the HTML output. For example, this input:
-</p>
-<pre><code>* Bird
+</code>
+</pre>
+<p>If list items are separated by blank lines, Markdown will wrap the
+items in <code>&lt;p&gt;</code> tags in the HTML output. For example, this input:</p>
+<pre>
+ <code>* Bird
* Magic
-</code></pre><p>will turn into:
-</p>
-<pre><code>&lt;ul&gt;
+</code>
+</pre>
+<p>will turn into:</p>
+<pre>
+ <code>&lt;ul&gt;
&lt;li&gt;Bird&lt;/li&gt;
&lt;li&gt;Magic&lt;/li&gt;
&lt;/ul&gt;
-</code></pre><p>But this:
-</p>
-<pre><code>* Bird
+</code>
+</pre>
+<p>But this:</p>
+<pre>
+ <code>* Bird
* Magic
-</code></pre><p>will turn into:
-</p>
-<pre><code>&lt;ul&gt;
+</code>
+</pre>
+<p>will turn into:</p>
+<pre>
+ <code>&lt;ul&gt;
&lt;li&gt;&lt;p&gt;Bird&lt;/p&gt;&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Magic&lt;/p&gt;&lt;/li&gt;
&lt;/ul&gt;
-</code></pre><p>List items may consist of multiple paragraphs. Each subsequent
- paragraph in a list item must be intended by either 4 spaces
- or one tab:
-</p>
-<pre><code>1. This is a list item with two paragraphs. Lorem ipsum dolor
+</code>
+</pre>
+<p>List items may consist of multiple paragraphs. Each subsequent
+paragraph in a list item must be intended by either 4 spaces
+or one tab:</p>
+<pre>
+ <code>1. This is a list item with two paragraphs. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit. Aliquam hendrerit
mi posuere lectus.
@@ -420,103 +425,122 @@ Suspendisse id sem consectetuer libero luctus adipiscing.
sit amet velit.
2. Suspendisse id sem consectetuer libero luctus adipiscing.
-</code></pre><p>It looks nice if you indent every line of the subsequent
- paragraphs, but here again, Markdown will allow you to be
- lazy:
-</p>
-<pre><code>* This is a list item with two paragraphs.
+</code>
+</pre>
+<p>It looks nice if you indent every line of the subsequent
+paragraphs, but here again, Markdown will allow you to be
+lazy:</p>
+<pre>
+ <code>* This is a list item with two paragraphs.
This is the second paragraph in the list item. You're
only required to indent the first line. Lorem ipsum dolor
sit amet, consectetuer adipiscing elit.
* Another item in the same list.
-</code></pre><p>To put a blockquote within a list item, the blockquote's <code>&gt;</code>
- delimiters need to be indented:
-</p>
-<pre><code>* A list item with a blockquote:
+</code>
+</pre>
+<p>To put a blockquote within a list item, the blockquote's <code>&gt;</code>
+delimiters need to be indented:</p>
+<pre>
+ <code>* A list item with a blockquote:
&gt; This is a blockquote
&gt; inside a list item.
-</code></pre><p>To put a code block within a list item, the code block needs
- to be indented <em>twice</em> -- 8 spaces or two tabs:
-</p>
-<pre><code>* A list item with a code block:
+</code>
+</pre>
+<p>To put a code block within a list item, the code block needs
+to be indented <em>twice</em> -- 8 spaces or two tabs:</p>
+<pre>
+ <code>* A list item with a code block:
&lt;code goes here&gt;
-</code></pre><p>It's worth noting that it's possible to trigger an ordered list by
- accident, by writing something like this:
-</p>
-<pre><code>1986. What a great season.
-</code></pre><p>In other words, a <em>number-period-space</em> sequence at the beginning of a
- line. To avoid this, you can backslash-escape the period:
-</p>
-<pre><code>1986\. What a great season.
-</code></pre><h3 id="precode">Code Blocks</h3>
-
+</code>
+</pre>
+<p>It's worth noting that it's possible to trigger an ordered list by
+accident, by writing something like this:</p>
+<pre>
+ <code>1986. What a great season.
+</code>
+</pre>
+<p>In other words, a <em>number-period-space</em> sequence at the beginning of a
+line. To avoid this, you can backslash-escape the period:</p>
+<pre>
+ <code>1986\. What a great season.
+</code>
+</pre>
+<p><h3 id="precode">Code Blocks</h3></p>
<p>Pre-formatted code blocks are used for writing about programming or
- markup source code. Rather than forming normal paragraphs, the lines
- of a code block are interpreted literally. Markdown wraps a code block
- in both <code>&lt;pre&gt;</code> and <code>&lt;code&gt;</code> tags.
-</p>
+markup source code. Rather than forming normal paragraphs, the lines
+of a code block are interpreted literally. Markdown wraps a code block
+in both <code>&lt;pre&gt;</code> and <code>&lt;code&gt;</code> tags.</p>
<p>To produce a code block in Markdown, simply indent every line of the
- block by at least 4 spaces or 1 tab. For example, given this input:
-</p>
-<pre><code>This is a normal paragraph:
+block by at least 4 spaces or 1 tab. For example, given this input:</p>
+<pre>
+ <code>This is a normal paragraph:
This is a code block.
-</code></pre><p>Markdown will generate:
-</p>
-<pre><code>&lt;p&gt;This is a normal paragraph:&lt;/p&gt;
+</code>
+</pre>
+<p>Markdown will generate:</p>
+<pre>
+ <code>&lt;p&gt;This is a normal paragraph:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;This is a code block.
&lt;/code&gt;&lt;/pre&gt;
-</code></pre><p>One level of indentation -- 4 spaces or 1 tab -- is removed from each
- line of the code block. For example, this:
-</p>
-<pre><code>Here is an example of AppleScript:
+</code>
+</pre>
+<p>One level of indentation -- 4 spaces or 1 tab -- is removed from each
+line of the code block. For example, this:</p>
+<pre>
+ <code>Here is an example of AppleScript:
tell application "Foo"
beep
end tell
-</code></pre><p>will turn into:
-</p>
-<pre><code>&lt;p&gt;Here is an example of AppleScript:&lt;/p&gt;
+</code>
+</pre>
+<p>will turn into:</p>
+<pre>
+ <code>&lt;p&gt;Here is an example of AppleScript:&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;tell application "Foo"
beep
end tell
&lt;/code&gt;&lt;/pre&gt;
-</code></pre><p>A code block continues until it reaches a line that is not indented
- (or the end of the article).
-</p>
+</code>
+</pre>
+<p>A code block continues until it reaches a line that is not indented
+(or the end of the article).</p>
<p>Within a code block, ampersands (<code>&amp;</code>) and angle brackets (<code>&lt;</code> and <code>&gt;</code>)
- are automatically converted into HTML entities. This makes it very
- easy to include example HTML source code using Markdown -- just paste
- it and indent it, and Markdown will handle the hassle of encoding the
- ampersands and angle brackets. For example, this:
-</p>
-<pre><code> &lt;div class="footer"&gt;
+are automatically converted into HTML entities. This makes it very
+easy to include example HTML source code using Markdown -- just paste
+it and indent it, and Markdown will handle the hassle of encoding the
+ampersands and angle brackets. For example, this:</p>
+<pre>
+ <code> &lt;div class="footer"&gt;
&amp;copy; 2004 Foo Corporation
&lt;/div&gt;
-</code></pre><p>will turn into:
-</p>
-<pre><code>&lt;pre&gt;&lt;code&gt;&amp;lt;div class="footer"&amp;gt;
+</code>
+</pre>
+<p>will turn into:</p>
+<pre>
+ <code>&lt;pre&gt;&lt;code&gt;&amp;lt;div class="footer"&amp;gt;
&amp;amp;copy; 2004 Foo Corporation
&amp;lt;/div&amp;gt;
&lt;/code&gt;&lt;/pre&gt;
-</code></pre><p>Regular Markdown syntax is not processed within code blocks. E.g.,
- asterisks are just literal asterisks within a code block. This means
- it's also easy to use Markdown to write about Markdown's own syntax.
-</p>
-<h3 id="hr">Horizontal Rules</h3>
-
+</code>
+</pre>
+<p>Regular Markdown syntax is not processed within code blocks. E.g.,
+asterisks are just literal asterisks within a code block. This means
+it's also easy to use Markdown to write about Markdown's own syntax.</p>
+<p><h3 id="hr">Horizontal Rules</h3></p>
<p>You can produce a horizontal rule tag (<code>&lt;hr /&gt;</code>) by placing three or
- more hyphens, asterisks, or underscores on a line by themselves. If you
- wish, you may use spaces between the hyphens or asterisks. Each of the
- following lines will produce a horizontal rule:
-</p>
-<pre><code>* * *
+more hyphens, asterisks, or underscores on a line by themselves. If you
+wish, you may use spaces between the hyphens or asterisks. Each of the
+following lines will produce a horizontal rule:</p>
+<pre>
+ <code>* * *
***
@@ -527,317 +551,349 @@ end tell
---------------------------------------
_ _ _
-</code></pre><hr />
-
-<h2 id="span">Span Elements</h2>
-
-<h3 id="link">Links</h3>
-
-<p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.
-</p>
-<p>In both styles, the link text is delimited by [square brackets].
-</p>
+</code>
+</pre>
+<hr />
+<p><h2 id="span">Span Elements</h2></p>
+<p><h3 id="link">Links</h3></p>
+<p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p>
+<p>In both styles, the link text is delimited by [square brackets].</p>
<p>To create an inline link, use a set of regular parentheses immediately
- after the link text's closing square bracket. Inside the parentheses,
- put the URL where you want the link to point, along with an <em>optional</em>
- title for the link, surrounded in quotes. For example:
-</p>
-<pre><code>This is [an example](http://example.com/ "Title") inline link.
+after the link text's closing square bracket. Inside the parentheses,
+put the URL where you want the link to point, along with an <em>optional</em>
+title for the link, surrounded in quotes. For example:</p>
+<pre>
+ <code>This is [an example](http://example.com/ "Title") inline link.
[This link](http://example.net/) has no title attribute.
-</code></pre><p>Will produce:
-</p>
-<pre><code>&lt;p&gt;This is &lt;a href="http://example.com/" title="Title"&gt;
+</code>
+</pre>
+<p>Will produce:</p>
+<pre>
+ <code>&lt;p&gt;This is &lt;a href="http://example.com/" title="Title"&gt;
an example&lt;/a&gt; inline link.&lt;/p&gt;
&lt;p&gt;&lt;a href="http://example.net/"&gt;This link&lt;/a&gt; has no
title attribute.&lt;/p&gt;
-</code></pre><p>If you're referring to a local resource on the same server, you can
- use relative paths:
-</p>
-<pre><code>See my [About](/about/) page for details.
-</code></pre><p>Reference-style links use a second set of square brackets, inside
- which you place a label of your choosing to identify the link:
-</p>
-<pre><code>This is [an example][id] reference-style link.
-</code></pre><p>You can optionally use a space to separate the sets of brackets:
-</p>
-<pre><code>This is [an example] [id] reference-style link.
-</code></pre><p>Then, anywhere in the document, you define your link label like this,
- on a line by itself:
-</p>
-<pre><code>[id]: http://example.com/ "Optional Title Here"
-</code></pre><p>That is:
-</p>
+</code>
+</pre>
+<p>If you're referring to a local resource on the same server, you can
+use relative paths:</p>
+<pre>
+ <code>See my [About](/about/) page for details.
+</code>
+</pre>
+<p>Reference-style links use a second set of square brackets, inside
+which you place a label of your choosing to identify the link:</p>
+<pre>
+ <code>This is [an example][id] reference-style link.
+</code>
+</pre>
+<p>You can optionally use a space to separate the sets of brackets:</p>
+<pre>
+ <code>This is [an example] [id] reference-style link.
+</code>
+</pre>
+<p>Then, anywhere in the document, you define your link label like this,
+on a line by itself:</p>
+<pre>
+ <code>[id]: http://example.com/ "Optional Title Here"
+</code>
+</pre>
+<p>That is:</p>
<ul>
- <li>
- Square brackets containing the link identifier (optionally
- indented from the left margin using up to three spaces);
- </li>
-
- <li>
- followed by a colon;
- </li>
-
- <li>
- followed by one or more spaces (or tabs);
- </li>
-
- <li>
- followed by the URL for the link;
- </li>
-
- <li>
- optionally followed by a title attribute for the link, enclosed
- in double or single quotes.
- </li>
+ <li>Square brackets containing the link identifier (optionally
+indented from the left margin using up to three spaces);</li>
+ <li>followed by a colon;</li>
+ <li>followed by one or more spaces (or tabs);</li>
+ <li>followed by the URL for the link;</li>
+ <li>optionally followed by a title attribute for the link, enclosed
+in double or single quotes.</li>
</ul>
-<p>The link URL may, optionally, be surrounded by angle brackets:
-</p>
-<pre><code>[id]: &lt;http://example.com/&gt; "Optional Title Here"
-</code></pre><p>You can put the title attribute on the next line and use extra spaces
- or tabs for padding, which tends to look better with longer URLs:
-</p>
-<pre><code>[id]: http://example.com/longish/path/to/resource/here
+<p>The link URL may, optionally, be surrounded by angle brackets:</p>
+<pre>
+ <code>[id]: &lt;http://example.com/&gt; "Optional Title Here"
+</code>
+</pre>
+<p>You can put the title attribute on the next line and use extra spaces
+or tabs for padding, which tends to look better with longer URLs:</p>
+<pre>
+ <code>[id]: http://example.com/longish/path/to/resource/here
"Optional Title Here"
-</code></pre><p>Link definitions are only used for creating links during Markdown
- processing, and are stripped from your document in the HTML output.
-</p>
-<p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:
-</p>
-<pre><code>[link text][a]
+</code>
+</pre>
+<p>Link definitions are only used for creating links during Markdown
+processing, and are stripped from your document in the HTML output.</p>
+<p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:</p>
+<pre>
+ <code>[link text][a]
[link text][A]
-</code></pre><p>are equivalent.
-</p>
+</code>
+</pre>
+<p>are equivalent.</p>
<p>The <em>implicit link name</em> shortcut allows you to omit the name of the
- link, in which case the link text itself is used as the name.
- Just use an empty set of square brackets -- e.g., to link the word
- "Google" to the google.com web site, you could simply write:
-</p>
-<pre><code>[Google][]
-</code></pre><p>And then define the link:
-</p>
-<pre><code>[Google]: http://google.com/
-</code></pre><p>Because link names may contain spaces, this shortcut even works for
- multiple words in the link text:
-</p>
-<pre><code>Visit [Daring Fireball][] for more information.
-</code></pre><p>And then define the link:
-</p>
-<pre><code>[Daring Fireball]: http://daringfireball.net/
-</code></pre><p>Link definitions can be placed anywhere in your Markdown document. I
- tend to put them immediately after each paragraph in which they're
- used, but if you want, you can put them all at the end of your
- document, sort of like footnotes.
-</p>
-<p>Here's an example of reference links in action:
-</p>
-<pre><code>I get 10 times more traffic from [Google] [1] than from
+link, in which case the link text itself is used as the name.
+Just use an empty set of square brackets -- e.g., to link the word
+"Google" to the google.com web site, you could simply write:</p>
+<pre>
+ <code>[Google][]
+</code>
+</pre>
+<p>And then define the link:</p>
+<pre>
+ <code>[Google]: http://google.com/
+</code>
+</pre>
+<p>Because link names may contain spaces, this shortcut even works for
+multiple words in the link text:</p>
+<pre>
+ <code>Visit [Daring Fireball][] for more information.
+</code>
+</pre>
+<p>And then define the link:</p>
+<pre>
+ <code>[Daring Fireball]: http://daringfireball.net/
+</code>
+</pre>
+<p>Link definitions can be placed anywhere in your Markdown document. I
+tend to put them immediately after each paragraph in which they're
+used, but if you want, you can put them all at the end of your
+document, sort of like footnotes.</p>
+<p>Here's an example of reference links in action:</p>
+<pre>
+ <code>I get 10 times more traffic from [Google] [1] than from
[Yahoo] [2] or [MSN] [3].
[1]: http://google.com/ "Google"
[2]: http://search.yahoo.com/ "Yahoo Search"
[3]: http://search.msn.com/ "MSN Search"
-</code></pre><p>Using the implicit link name shortcut, you could instead write:
-</p>
-<pre><code>I get 10 times more traffic from [Google][] than from
+</code>
+</pre>
+<p>Using the implicit link name shortcut, you could instead write:</p>
+<pre>
+ <code>I get 10 times more traffic from [Google][] than from
[Yahoo][] or [MSN][].
[google]: http://google.com/ "Google"
[yahoo]: http://search.yahoo.com/ "Yahoo Search"
[msn]: http://search.msn.com/ "MSN Search"
-</code></pre><p>Both of the above examples will produce the following HTML output:
-</p>
-<pre><code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
+</code>
+</pre>
+<p>Both of the above examples will produce the following HTML output:</p>
+<pre>
+ <code>&lt;p&gt;I get 10 times more traffic from &lt;a href="http://google.com/"
title="Google"&gt;Google&lt;/a&gt; than from
&lt;a href="http://search.yahoo.com/" title="Yahoo Search"&gt;Yahoo&lt;/a&gt;
or &lt;a href="http://search.msn.com/" title="MSN Search"&gt;MSN&lt;/a&gt;.&lt;/p&gt;
-</code></pre><p>For comparison, here is the same paragraph written using
- Markdown's inline link style:
-</p>
-<pre><code>I get 10 times more traffic from [Google](http://google.com/ "Google")
+</code>
+</pre>
+<p>For comparison, here is the same paragraph written using
+Markdown's inline link style:</p>
+<pre>
+ <code>I get 10 times more traffic from [Google](http://google.com/ "Google")
than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
[MSN](http://search.msn.com/ "MSN Search").
-</code></pre><p>The point of reference-style links is not that they're easier to
- write. The point is that with reference-style links, your document
- source is vastly more readable. Compare the above examples: using
- reference-style links, the paragraph itself is only 81 characters
- long; with inline-style links, it's 176 characters; and as raw HTML,
- it's 234 characters. In the raw HTML, there's more markup than there
- is text.
-</p>
+</code>
+</pre>
+<p>The point of reference-style links is not that they're easier to
+write. The point is that with reference-style links, your document
+source is vastly more readable. Compare the above examples: using
+reference-style links, the paragraph itself is only 81 characters
+long; with inline-style links, it's 176 characters; and as raw HTML,
+it's 234 characters. In the raw HTML, there's more markup than there
+is text.</p>
<p>With Markdown's reference-style links, a source document much more
- closely resembles the final output, as rendered in a browser. By
- allowing you to move the markup-related metadata out of the paragraph,
- you can add links without interrupting the narrative flow of your
- prose.
-</p>
-<h3 id="em">Emphasis</h3>
-
+closely resembles the final output, as rendered in a browser. By
+allowing you to move the markup-related metadata out of the paragraph,
+you can add links without interrupting the narrative flow of your
+prose.</p>
+<p><h3 id="em">Emphasis</h3></p>
<p>Markdown treats asterisks (<code>*</code>) and underscores (<code>_</code>) as indicators of
- emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an
- HTML <code>&lt;em&gt;</code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML
- <code>&lt;strong&gt;</code> tag. E.g., this input:
-</p>
-<pre><code>*single asterisks*
+emphasis. Text wrapped with one <code>*</code> or <code>_</code> will be wrapped with an
+HTML <code>&lt;em&gt;</code> tag; double <code>*</code>'s or <code>_</code>'s will be wrapped with an HTML
+<code>&lt;strong&gt;</code> tag. E.g., this input:</p>
+<pre>
+ <code>*single asterisks*
_single underscores_
**double asterisks**
__double underscores__
-</code></pre><p>will produce:
-</p>
-<pre><code>&lt;em&gt;single asterisks&lt;/em&gt;
+</code>
+</pre>
+<p>will produce:</p>
+<pre>
+ <code>&lt;em&gt;single asterisks&lt;/em&gt;
&lt;em&gt;single underscores&lt;/em&gt;
&lt;strong&gt;double asterisks&lt;/strong&gt;
&lt;strong&gt;double underscores&lt;/strong&gt;
-</code></pre><p>You can use whichever style you prefer; the lone restriction is that
- the same character must be used to open and close an emphasis span.
-</p>
-<p>Emphasis can be used in the middle of a word:
-</p>
-<pre><code>un*fucking*believable
-</code></pre><p>But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a
- literal asterisk or underscore.
-</p>
+</code>
+</pre>
+<p>You can use whichever style you prefer; the lone restriction is that
+the same character must be used to open and close an emphasis span.</p>
+<p>Emphasis can be used in the middle of a word:</p>
+<pre>
+ <code>un*fucking*believable
+</code>
+</pre>
+<p>But if you surround an <code>*</code> or <code>_</code> with spaces, it'll be treated as a
+literal asterisk or underscore.</p>
<p>To produce a literal asterisk or underscore at a position where it
- would otherwise be used as an emphasis delimiter, you can backslash
- escape it:
-</p>
-<pre><code>\*this text is surrounded by literal asterisks\*
-</code></pre><h3 id="code">Code</h3>
-
+would otherwise be used as an emphasis delimiter, you can backslash
+escape it:</p>
+<pre>
+ <code>\*this text is surrounded by literal asterisks\*
+</code>
+</pre>
+<p><h3 id="code">Code</h3></p>
<p>To indicate a span of code, wrap it with backtick quotes (<code>`</code>).
- Unlike a pre-formatted code block, a code span indicates code within a
- normal paragraph. For example:
-</p>
-<pre><code>Use the `printf()` function.
-</code></pre><p>will produce:
-</p>
-<pre><code>&lt;p&gt;Use the &lt;code&gt;printf()&lt;/code&gt; function.&lt;/p&gt;
-</code></pre><p>To include a literal backtick character within a code span, you can use
- multiple backticks as the opening and closing delimiters:
-</p>
-<pre><code>``There is a literal backtick (`) here.``
-</code></pre><p>which will produce this:
-</p>
-<pre><code>&lt;p&gt;&lt;code&gt;There is a literal backtick (`) here.&lt;/code&gt;&lt;/p&gt;
-</code></pre><p>The backtick delimiters surrounding a code span may include spaces --
- one after the opening, one before the closing. This allows you to place
- literal backtick characters at the beginning or end of a code span:
-</p>
-<pre><code>A single backtick in a code span: `` ` ``
+Unlike a pre-formatted code block, a code span indicates code within a
+normal paragraph. For example:</p>
+<pre>
+ <code>Use the `printf()` function.
+</code>
+</pre>
+<p>will produce:</p>
+<pre>
+ <code>&lt;p&gt;Use the &lt;code&gt;printf()&lt;/code&gt; function.&lt;/p&gt;
+</code>
+</pre>
+<p>To include a literal backtick character within a code span, you can use
+multiple backticks as the opening and closing delimiters:</p>
+<pre>
+ <code>``There is a literal backtick (`) here.``
+</code>
+</pre>
+<p>which will produce this:</p>
+<pre>
+ <code>&lt;p&gt;&lt;code&gt;There is a literal backtick (`) here.&lt;/code&gt;&lt;/p&gt;
+</code>
+</pre>
+<p>The backtick delimiters surrounding a code span may include spaces --
+one after the opening, one before the closing. This allows you to place
+literal backtick characters at the beginning or end of a code span:</p>
+<pre>
+ <code>A single backtick in a code span: `` ` ``
A backtick-delimited string in a code span: `` `foo` ``
-</code></pre><p>will produce:
-</p>
-<pre><code>&lt;p&gt;A single backtick in a code span: &lt;code&gt;`&lt;/code&gt;&lt;/p&gt;
+</code>
+</pre>
+<p>will produce:</p>
+<pre>
+ <code>&lt;p&gt;A single backtick in a code span: &lt;code&gt;`&lt;/code&gt;&lt;/p&gt;
&lt;p&gt;A backtick-delimited string in a code span: &lt;code&gt;`foo`&lt;/code&gt;&lt;/p&gt;
-</code></pre><p>With a code span, ampersands and angle brackets are encoded as HTML
- entities automatically, which makes it easy to include example HTML
- tags. Markdown will turn this:
-</p>
-<pre><code>Please don't use any `&lt;blink&gt;` tags.
-</code></pre><p>into:
-</p>
-<pre><code>&lt;p&gt;Please don't use any &lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
-</code></pre><p>You can write this:
-</p>
-<pre><code>`&amp;#8212;` is the decimal-encoded equivalent of `&amp;mdash;`.
-</code></pre><p>to produce:
-</p>
-<pre><code>&lt;p&gt;&lt;code&gt;&amp;amp;#8212;&lt;/code&gt; is the decimal-encoded
+</code>
+</pre>
+<p>With a code span, ampersands and angle brackets are encoded as HTML
+entities automatically, which makes it easy to include example HTML
+tags. Markdown will turn this:</p>
+<pre>
+ <code>Please don't use any `&lt;blink&gt;` tags.
+</code>
+</pre>
+<p>into:</p>
+<pre>
+ <code>&lt;p&gt;Please don't use any &lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;
+</code>
+</pre>
+<p>You can write this:</p>
+<pre>
+ <code>`&amp;#8212;` is the decimal-encoded equivalent of `&amp;mdash;`.
+</code>
+</pre>
+<p>to produce:</p>
+<pre>
+ <code>&lt;p&gt;&lt;code&gt;&amp;amp;#8212;&lt;/code&gt; is the decimal-encoded
equivalent of &lt;code&gt;&amp;amp;mdash;&lt;/code&gt;.&lt;/p&gt;
-</code></pre><h3 id="img">Images</h3>
-
+</code>
+</pre>
+<p><h3 id="img">Images</h3></p>
<p>Admittedly, it's fairly difficult to devise a "natural" syntax for
- placing images into a plain text document format.
-</p>
+placing images into a plain text document format.</p>
<p>Markdown uses an image syntax that is intended to resemble the syntax
- for links, allowing for two styles: <em>inline</em> and <em>reference</em>.
-</p>
-<p>Inline image syntax looks like this:
-</p>
-<pre><code>![Alt text](/path/to/img.jpg)
+for links, allowing for two styles: <em>inline</em> and <em>reference</em>.</p>
+<p>Inline image syntax looks like this:</p>
+<pre>
+ <code>![Alt text](/path/to/img.jpg)
![Alt text](/path/to/img.jpg "Optional title")
-</code></pre><p>That is:
-</p>
+</code>
+</pre>
+<p>That is:</p>
<ul>
- <li>
- An exclamation mark: <code>!</code>;
- </li>
-
- <li>
- followed by a set of square brackets, containing the <code>alt</code>
-attribute text for the image;
- </li>
-
- <li>
- followed by a set of parentheses, containing the URL or path to
- the image, and an optional <code>title</code> attribute enclosed in double
-or single quotes.
- </li>
+ <li>An exclamation mark: <code>!</code>;</li>
+ <li>followed by a set of square brackets, containing the <code>alt</code>
+attribute text for the image;</li>
+ <li>followed by a set of parentheses, containing the URL or path to
+the image, and an optional <code>title</code> attribute enclosed in double
+or single quotes.</li>
</ul>
-<p>Reference-style image syntax looks like this:
-</p>
-<pre><code>![Alt text][id]
-</code></pre><p>Where "id" is the name of a defined image reference. Image references
- are defined using syntax identical to link references:
-</p>
-<pre><code>[id]: url/to/image "Optional title attribute"
-</code></pre><p>As of this writing, Markdown has no syntax for specifying the
- dimensions of an image; if this is important to you, you can simply
- use regular HTML <code>&lt;img&gt;</code> tags.
-</p>
+<p>Reference-style image syntax looks like this:</p>
+<pre>
+ <code>![Alt text][id]
+</code>
+</pre>
+<p>Where "id" is the name of a defined image reference. Image references
+are defined using syntax identical to link references:</p>
+<pre>
+ <code>[id]: url/to/image "Optional title attribute"
+</code>
+</pre>
+<p>As of this writing, Markdown has no syntax for specifying the
+dimensions of an image; if this is important to you, you can simply
+use regular HTML <code>&lt;img&gt;</code> tags.</p>
<hr />
-
-<h2 id="misc">Miscellaneous</h2>
-
-<h3 id="autolink">Automatic Links</h3>
-
-<p>Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:
-</p>
-<pre><code>&lt;http://example.com/&gt;
-</code></pre><p>Markdown will turn this into:
-</p>
-<pre><code>&lt;a href="http://example.com/"&gt;http://example.com/&lt;/a&gt;
-</code></pre><p>Automatic links for email addresses work similarly, except that
- Markdown will also perform a bit of randomized decimal and hex
- entity-encoding to help obscure your address from address-harvesting
- spambots. For example, Markdown will turn this:
-</p>
-<pre><code>&lt;address@example.com&gt;
-</code></pre><p>into something like this:
-</p>
-<pre><code>&lt;a href="&amp;#x6D;&amp;#x61;i&amp;#x6C;&amp;#x74;&amp;#x6F;:&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;
+<p><h2 id="misc">Miscellaneous</h2></p>
+<p><h3 id="autolink">Automatic Links</h3></p>
+<p>Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:</p>
+<pre>
+ <code>&lt;http://example.com/&gt;
+</code>
+</pre>
+<p>Markdown will turn this into:</p>
+<pre>
+ <code>&lt;a href="http://example.com/"&gt;http://example.com/&lt;/a&gt;
+</code>
+</pre>
+<p>Automatic links for email addresses work similarly, except that
+Markdown will also perform a bit of randomized decimal and hex
+entity-encoding to help obscure your address from address-harvesting
+spambots. For example, Markdown will turn this:</p>
+<pre>
+ <code>&lt;address@example.com&gt;
+</code>
+</pre>
+<p>into something like this:</p>
+<pre>
+ <code>&lt;a href="&amp;#x6D;&amp;#x61;i&amp;#x6C;&amp;#x74;&amp;#x6F;:&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;
&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;
&amp;#109;"&gt;&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;
&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;&amp;#109;&lt;/a&gt;
-</code></pre><p>which will render in a browser as a clickable link to "address@example.com".
-</p>
+</code>
+</pre>
+<p>which will render in a browser as a clickable link to "address@example.com".</p>
<p>(This sort of entity-encoding trick will indeed fool many, if not
- most, address-harvesting bots, but it definitely won't fool all of
- them. It's better than nothing, but an address published in this way
- will probably eventually start receiving spam.)
-</p>
-<h3 id="backslash">Backslash Escapes</h3>
-
+most, address-harvesting bots, but it definitely won't fool all of
+them. It's better than nothing, but an address published in this way
+will probably eventually start receiving spam.)</p>
+<p><h3 id="backslash">Backslash Escapes</h3></p>
<p>Markdown allows you to use backslash escapes to generate literal
- characters which would otherwise have special meaning in Markdown's
- formatting syntax. For example, if you wanted to surround a word with
- literal asterisks (instead of an HTML <code>&lt;em&gt;</code> tag), you can backslashes
- before the asterisks, like this:
-</p>
-<pre><code>\*literal asterisks\*
-</code></pre><p>Markdown provides backslash escapes for the following characters:
-</p>
-<pre><code>\ backslash
+characters which would otherwise have special meaning in Markdown's
+formatting syntax. For example, if you wanted to surround a word with
+literal asterisks (instead of an HTML <code>&lt;em&gt;</code> tag), you can backslashes
+before the asterisks, like this:</p>
+<pre>
+ <code>\*literal asterisks\*
+</code>
+</pre>
+<p>Markdown provides backslash escapes for the following characters:</p>
+<pre>
+ <code>\ backslash
` backtick
* asterisk
_ underscore
@@ -849,4 +905,6 @@ _ underscore
- minus sign (hyphen)
. dot
! exclamation mark
-</code></pre> \ No newline at end of file
+</code>
+</pre>
+
diff --git a/tests/markdown-test/nested-blockquotes.html b/tests/markdown-test/nested-blockquotes.html
index 7a94df2..f1b017e 100644
--- a/tests/markdown-test/nested-blockquotes.html
+++ b/tests/markdown-test/nested-blockquotes.html
@@ -1,9 +1,7 @@
-
-<blockquote><p>foo
-</p>
-<blockquote><p>bar
-</p>
-</blockquote><p>foo
-</p>
+<blockquote>
+<p>foo</p>
+<blockquote>
+<p>bar</p>
</blockquote>
-
+<p>foo</p>
+</blockquote> \ No newline at end of file
diff --git a/tests/markdown-test/ordered-and-unordered-list.html b/tests/markdown-test/ordered-and-unordered-list.html
index 928f094..e96cdba 100644
--- a/tests/markdown-test/ordered-and-unordered-list.html
+++ b/tests/markdown-test/ordered-and-unordered-list.html
@@ -1,273 +1,146 @@
-
-
<h2>Unordered</h2>
-<p>Asterisks tight:
-</p>
+<p>Asterisks tight:</p>
<ul>
- <li>
- asterisk 1
- </li>
-
- <li>
- asterisk 2
- </li>
-
- <li>
- asterisk 3
- </li>
+<li>asterisk 1</li>
+<li>asterisk 2</li>
+<li>asterisk 3</li>
</ul>
-<p>Asterisks loose:
-</p>
+<p>Asterisks loose:</p>
<ul>
- <li><p>asterisk 1
-</p>
-
- </li>
-
- <li><p>asterisk 2
-</p>
-
- </li>
-
- <li><p>asterisk 3
-</p>
-
- </li>
+<li>
+<p>asterisk 1</p>
+</li>
+<li>
+<p>asterisk 2</p>
+</li>
+<li>
+<p>asterisk 3</p>
+</li>
</ul>
<hr />
-
-<p>Pluses tight:
-</p>
+<p>Pluses tight:</p>
<ul>
- <li>
- Plus 1
- </li>
-
- <li>
- Plus 2
- </li>
-
- <li>
- Plus 3
- </li>
+<li>Plus 1</li>
+<li>Plus 2</li>
+<li>Plus 3</li>
</ul>
-<p>Pluses loose:
-</p>
+<p>Pluses loose:</p>
<ul>
- <li><p>Plus 1
-</p>
-
- </li>
-
- <li><p>Plus 2
-</p>
-
- </li>
-
- <li><p>Plus 3
-</p>
-
- </li>
+<li>
+<p>Plus 1</p>
+</li>
+<li>
+<p>Plus 2</p>
+</li>
+<li>
+<p>Plus 3</p>
+</li>
</ul>
<hr />
-
-<p>Minuses tight:
-</p>
+<p>Minuses tight:</p>
<ul>
- <li>
- Minus 1
- </li>
-
- <li>
- Minus 2
- </li>
-
- <li>
- Minus 3
- </li>
+<li>Minus 1</li>
+<li>Minus 2</li>
+<li>Minus 3</li>
</ul>
-<p>Minuses loose:
-</p>
+<p>Minuses loose:</p>
<ul>
- <li><p>Minus 1
-</p>
-
- </li>
-
- <li><p>Minus 2
-</p>
-
- </li>
-
- <li><p>Minus 3
-</p>
-
- </li>
+<li>
+<p>Minus 1</p>
+</li>
+<li>
+<p>Minus 2</p>
+</li>
+<li>
+<p>Minus 3</p>
+</li>
</ul>
-
<h2>Ordered</h2>
-<p>Tight:
-</p>
+<p>Tight:</p>
<ol>
- <li>
- First
- </li>
-
- <li>
- Second
- </li>
-
- <li>
- Third
- </li>
+<li>First</li>
+<li>Second</li>
+<li>Third</li>
</ol>
-<p>and:
-</p>
+<p>and:</p>
<ol>
- <li>
- One
- </li>
-
- <li>
- Two
- </li>
-
- <li>
- Three
- </li>
+<li>One</li>
+<li>Two</li>
+<li>Three</li>
</ol>
-<p>Loose using tabs:
-</p>
+<p>Loose using tabs:</p>
<ol>
- <li><p>First
-</p>
-
- </li>
-
- <li><p>Second
-</p>
-
- </li>
-
- <li><p>Third
-</p>
-
- </li>
+<li>
+<p>First</p>
+</li>
+<li>
+<p>Second</p>
+</li>
+<li>
+<p>Third</p>
+</li>
</ol>
-<p>and using spaces:
-</p>
+<p>and using spaces:</p>
<ol>
- <li><p>One
-</p>
-
- </li>
-
- <li><p>Two
-</p>
-
- </li>
-
- <li><p>Three
-</p>
-
- </li>
+<li>
+<p>One</p>
+</li>
+<li>
+<p>Two</p>
+</li>
+<li>
+<p>Three</p>
+</li>
</ol>
-<p>Multiple paragraphs:
-</p>
+<p>Multiple paragraphs:</p>
<ol>
- <li><p>Item 1, graf one.
-</p>
+<li>
+<p>Item 1, graf one.</p>
<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's
- back.
-</p>
-
- </li>
-
- <li><p>Item 2.
-</p>
-
- </li>
-
- <li><p>Item 3.
-</p>
-
- </li>
+back.</p>
+</li>
+<li>
+<p>Item 2.</p>
+</li>
+<li>
+<p>Item 3.</p>
+</li>
</ol>
-
<h2>Nested</h2>
<ul>
- <li>
- Tab<ul>
- <li>
- Tab<ul>
- <li>
- Tab
- </li>
+<li>Tab<ul>
+<li>Tab<ul>
+<li>Tab</li>
</ul>
-
- </li>
+</li>
</ul>
-
- </li>
+</li>
</ul>
-<p>Here's another:
-</p>
+<p>Here's another:</p>
<ol>
- <li>
- First
- </li>
-
- <li>
- Second:<ul>
- <li>
- Fee
- </li>
-
- <li>
- Fie
- </li>
-
- <li>
- Foe
- </li>
+<li>First</li>
+<li>Second:<ul>
+<li>Fee</li>
+<li>Fie</li>
+<li>Foe</li>
</ul>
-
- </li>
-
- <li>
- Third
- </li>
+</li>
+<li>Third</li>
</ol>
-<p>Same thing but with paragraphs:
-</p>
+<p>Same thing but with paragraphs:</p>
<ol>
- <li><p>First
-</p>
-
- </li>
-
- <li><p>Second:
-</p>
+<li>
+<p>First</p>
+</li>
+<li>
+<p>Second:</p>
<ul>
- <li>
- Fee
- </li>
-
- <li>
- Fie
- </li>
-
- <li>
- Foe
- </li>
+<li>Fee</li>
+<li>Fie</li>
+<li>Foe</li>
</ul>
-
- </li>
-
- <li><p>Third
-</p>
-
- </li>
+</li>
+<li>
+<p>Third</p>
+</li>
</ol>
-
-
diff --git a/tests/markdown-test/strong-and-em-together.html b/tests/markdown-test/strong-and-em-together.html
index 7bb56db..04c1850 100644
--- a/tests/markdown-test/strong-and-em-together.html
+++ b/tests/markdown-test/strong-and-em-together.html
@@ -1,11 +1,16 @@
-
-<p><strong><em>This is strong and em.</em></strong>
+<p>
+<strong>
+<em>This is strong and em.</em>
+</strong>
</p>
-<p>So is <strong><em>this</em></strong> word.
+<p>So is <strong>
+<em>this</em>
+</strong> word.</p>
+<p>
+<strong>
+<em>This is strong and em.</em>
+</strong>
</p>
-<p><strong><em>This is strong and em.</em></strong>
-</p>
-<p>So is <strong><em>this</em></strong> word.
-</p>
-
-
+<p>So is <strong>
+<em>this</em>
+</strong> word.</p>
diff --git a/tests/markdown-test/tabs.html b/tests/markdown-test/tabs.html
index 3c91b64..f90fb95 100644
--- a/tests/markdown-test/tabs.html
+++ b/tests/markdown-test/tabs.html
@@ -1,29 +1,29 @@
-
<ul>
- <li><p>this is a list item
- indented with tabs
-</p>
-
- </li>
-
- <li><p>this is a list item
- indented with spaces
-</p>
-
- </li>
+<li>
+<p>this is a list item
+indented with tabs</p>
+</li>
+<li>
+<p>this is a list item
+indented with spaces</p>
+</li>
</ul>
-<p>Code:
-</p>
-<pre><code>this code block is indented by one tab
-</code></pre><p>And:
-</p>
-<pre><code> this code block is indented by two tabs
-</code></pre><p>And:
-</p>
-<pre><code>+ this is an example list item
+<p>Code:</p>
+<pre>
+<code>this code block is indented by one tab
+</code>
+</pre>
+<p>And:</p>
+<pre>
+<code> this code block is indented by two tabs
+</code>
+</pre>
+<p>And:</p>
+<pre>
+<code>+ this is an example list item
indented with tabs
+ this is an example list item
indented with spaces
-</code></pre>
-
+</code>
+</pre>
diff --git a/tests/markdown-test/tidyness.html b/tests/markdown-test/tidyness.html
index 0431d2d..52b2eaf 100644
--- a/tests/markdown-test/tidyness.html
+++ b/tests/markdown-test/tidyness.html
@@ -1,18 +1,8 @@
-
-<blockquote><p>A list within a blockquote:
-</p>
+<blockquote>
+<p>A list within a blockquote:</p>
<ul>
- <li>
- asterisk 1
- </li>
-
- <li>
- asterisk 2
- </li>
-
- <li>
- asterisk 3
- </li>
+<li>asterisk 1</li>
+<li>asterisk 2</li>
+<li>asterisk 3</li>
</ul>
-</blockquote>
-
+</blockquote> \ No newline at end of file
diff --git a/tests/misc/CRLF_line_ends.html b/tests/misc/CRLF_line_ends.html
index 8e6849f..c650e58 100644
--- a/tests/misc/CRLF_line_ends.html
+++ b/tests/misc/CRLF_line_ends.html
@@ -1,5 +1,4 @@
-<p>foo
-</p>
-<div>
+<p>foo</p>
+<p><div>
bar
-</div> \ No newline at end of file
+</div></p> \ No newline at end of file
diff --git a/tests/misc/adjacent-headers.html b/tests/misc/adjacent-headers.html
index 43ad50c..bd171aa 100644
--- a/tests/misc/adjacent-headers.html
+++ b/tests/misc/adjacent-headers.html
@@ -1,7 +1,2 @@
-
-
<h1>this is a huge header</h1>
-
-<h2>this is a smaller header</h2>
-
-
+<h2>this is a smaller header</h2> \ No newline at end of file
diff --git a/tests/misc/amp-in-url.html b/tests/misc/amp-in-url.html
index f4c1ac5..e0ca599 100644
--- a/tests/misc/amp-in-url.html
+++ b/tests/misc/amp-in-url.html
@@ -1,5 +1,3 @@
-
-<p><a href="http://www.freewisdom.org/this&amp;that">link</a>
-</p>
-
-
+<p>
+<a href="http://www.freewisdom.org/this&amp;that">link</a>
+</p> \ No newline at end of file
diff --git a/tests/misc/ampersand.html b/tests/misc/ampersand.html
index 672ffa5..94ed80c 100644
--- a/tests/misc/ampersand.html
+++ b/tests/misc/ampersand.html
@@ -1,7 +1,2 @@
-
-<p>&amp;
-</p>
-<p>AT&amp;T
-</p>
-
-
+<p>&amp;</p>
+<p>AT&amp;T</p> \ No newline at end of file
diff --git a/tests/misc/arabic.html b/tests/misc/arabic.html
index fa27f2e..830d62a 100644
--- a/tests/misc/arabic.html
+++ b/tests/misc/arabic.html
@@ -1,18 +1,17 @@
-<h1 dir="rtl">بايثون</h1>
-<p dir="rtl"><strong>بايثون</strong> لغة برمجة حديثة بسيطة، واضحة، سريعة ، تستخدم أسلوب البرمجة الكائنية (OOP) وقابلة للتطوير بالإضافة إلى أنها مجانية و مفتوحة المصدر. صُنفت بالأساس كلغة تفسيرية ، بايثون مصممة أصلاً للأداء بعض المهام الخاصة أو المحدودة. إلا أنه يمكن استخدامها بايثون لإنجاز المشاريع الضخمه كأي لغة برمجية أخرى، غالباً ما يُنصح المبتدئين في ميدان البرمجة بتعلم هذه اللغة لأنها من بين أسهل اللغات البرمجية تعلماً.
-</p>
-<p dir="rtl">نشأت بايثون في مركز CWI (مركز العلوم والحاسب الآلي) بأمستردام على يد جويدو فان رُزوم. تم تطويرها بلغة C. أطلق فان رُزوم اسم "بايثون" على لغته تعبيرًا عن إعجابه بفِرقَة مسرحية هزلية شهيرة من بريطانيا، كانت تطلق على نفسها اسم مونتي بايثون Monty Python.
-</p>
-<p dir="rtl">تتميز بايثون بمجتمعها النشط ، كما أن لها الكثير من المكتبات البرمجية ذات الأغراض الخاصة والتي برمجها أشخاص من مجتمع هذه اللغة ، مثلاً مكتبة PyGame التي توفر مجموعه من الوظائف من اجل برمجة الالعاب. ويمكن لبايثون التعامل مع العديد من أنواع قواعد البيانات مثل MySQL وغيره.
-</p>
-
-<h2 dir="rtl">أمثلة</h2>
-<p dir="rtl">مثال Hello World!
-</p>
-<pre><code>print "Hello World!"
-</code></pre><p dir="rtl">مثال لاستخراج المضروب Factorial :
-</p>
-<pre><code>num = 1
+<h1>بايثون</h1>
+<p>
+ <strong>بايثون</strong> لغة برمجة حديثة بسيطة، واضحة، سريعة ، تستخدم أسلوب البرمجة الكائنية (OOP) وقابلة للتطوير بالإضافة إلى أنها مجانية و مفتوحة المصدر. صُنفت بالأساس كلغة تفسيرية ، بايثون مصممة أصلاً للأداء بعض المهام الخاصة أو المحدودة. إلا أنه يمكن استخدامها بايثون لإنجاز المشاريع الضخمه كأي لغة برمجية أخرى، غالباً ما يُنصح المبتدئين في ميدان البرمجة بتعلم هذه اللغة لأنها من بين أسهل اللغات البرمجية تعلماً.</p>
+<p>نشأت بايثون في مركز CWI (مركز العلوم والحاسب الآلي) بأمستردام على يد جويدو فان رُزوم. تم تطويرها بلغة C. أطلق فان رُزوم اسم "بايثون" على لغته تعبيرًا عن إعجابه بفِرقَة مسرحية هزلية شهيرة من بريطانيا، كانت تطلق على نفسها اسم مونتي بايثون Monty Python.</p>
+<p>تتميز بايثون بمجتمعها النشط ، كما أن لها الكثير من المكتبات البرمجية ذات الأغراض الخاصة والتي برمجها أشخاص من مجتمع هذه اللغة ، مثلاً مكتبة PyGame التي توفر مجموعه من الوظائف من اجل برمجة الالعاب. ويمكن لبايثون التعامل مع العديد من أنواع قواعد البيانات مثل MySQL وغيره.</p>
+<h2>أمثلة</h2>
+<p>مثال Hello World!</p>
+<pre>
+ <code>print "Hello World!"
+</code>
+</pre>
+<p>مثال لاستخراج المضروب Factorial :</p>
+<pre>
+ <code>num = 1
x = raw_input('Insert the number please ')
x = int(x)
@@ -24,12 +23,13 @@ else:
x = x-1
print num
-</code></pre>
-<h2 dir="rtl">وصلات خارجية</h2>
-<ul dir="rtl">
- <li dir="rtl">
- <a href="http://www.python.org">الموقع الرسمي للغة بايثون</a>
- </li>
+</code>
+</pre>
+<h2>وصلات خارجية</h2>
+<ul>
+ <li>
+ <a href="http://www.python.org">الموقع الرسمي للغة بايثون</a>
+ </li>
</ul>
-<p dir="rtl"> بذرة حاس
-</p> \ No newline at end of file
+<p> بذرة حاس </p>
+
diff --git a/tests/misc/attributes2.html b/tests/misc/attributes2.html
index 62cbaca..cc1ae26 100644
--- a/tests/misc/attributes2.html
+++ b/tests/misc/attributes2.html
@@ -1,14 +1,6 @@
-
-<p id="TABLE.OF.CONTENTS">
-</p>
+<p id="TABLE.OF.CONTENTS" />
<ul>
- <li id="TABLEOFCONTENTS">
-
- </li>
+<li id="TABLEOFCONTENTS" />
</ul>
-<p id="TABLEOFCONTENTS">Or in the middle of the text
-</p>
-<p id="tableofcontents">
-</p>
-
-
+<p id="TABLEOFCONTENTS">Or in the middle of the text </p>
+<p id="tableofcontents" />
diff --git a/tests/misc/backtick-escape.html b/tests/misc/backtick-escape.html
new file mode 100644
index 0000000..dd736d4
--- /dev/null
+++ b/tests/misc/backtick-escape.html
@@ -0,0 +1,3 @@
+<p>\`This should not be in code.\`
+`This also should not be in code.`
+`And finally this should not be in code.`</p>
diff --git a/tests/misc/backtick-escape.txt b/tests/misc/backtick-escape.txt
new file mode 100644
index 0000000..b4d80b2
--- /dev/null
+++ b/tests/misc/backtick-escape.txt
@@ -0,0 +1,3 @@
+\\`This should not be in code.\\`
+\`This also should not be in code.\`
+\`And finally this should not be in code.`
diff --git a/tests/misc/bidi.html b/tests/misc/bidi.html
index 74d4374..e7759f1 100644
--- a/tests/misc/bidi.html
+++ b/tests/misc/bidi.html
@@ -1,39 +1,29 @@
-<p><strong>Python</strong>(パイソン)は、<a href="http://en.wikipedia.org/wiki/Guido_van_Rossum">Guido van Rossum</a> によって作られたオープンソースのオブジェクト指向スクリプト言語。<a href="http://ja.wikipedia.org/wiki/Perl">Perl</a>とともに欧米で広く普及している。イギリスのテレビ局 BBC が製作したコメディ番組『空飛ぶモンティ・パイソン』にちなんで名付けられた。 (Pythonには、爬虫類のニシキヘビの意味があり、Python言語のマスコットやアイコンとして使われることがある。)
-</p>
-<p>|||||||||||||||||||||||||||||THIS SHOULD BE LTR|||||||||||||||||||||||||
-</p>
-<p dir="rtl">|||||||||||||||||||||||||||||THIS SHOULD BE RTL|||||||||||||||||||||||||
-</p>
-<p dir="ltr">(<strong>بايثون</strong> لغة برمجة حديثة بسيطة، واضحة، سريعة ، تستخدم أسلوب البرمجة الكائنية (THIS SHOULD BE LTR ) وقابلة للتطوير بالإضافة إلى أنها مجانية و مفتوح
-</p>
-<p dir="rtl">پایتون زبان برنامه‌نویسی تفسیری و سطح بالا ، شی‌گرا و یک زبان برنامه‌نویسی تفسیری سمت سرور قدرتمند است که توسط گیدو ون روسوم در سال ۱۹۹۰ ساخته شد. این زبان در ویژگی‌ها شبیه پرل، روبی، اسکیم، اسمال‌تاک و تی‌سی‌ال است و از مدیریت خودکار حافظه استفاده می‌کند
-</p>
-<p>Python,是一种面向对象的、直譯式的计算机程序设计语言,也是一种功能强大而完善的通用型语言,已经具有十多年的发展历史,成熟且稳定。
-</p>
-<p dir="rtl">ބްލޫ ވޭލްގެ ދޫ މަތީގައި އެއްފަހަރާ 50 މީހުންނަށް ތިބެވިދާނެވެ. ބޮޑު މަހުގެ ދުލަކީ އެހާމެ ބޮޑު އެއްޗެކެވެ.
-</p>
-<p><strong>உருது</strong> 13ஆம் நூற்றாண்டில் உருவான ஒரு இந்தோ-ஐரோப்பிய மொழியாகும். உருது, ஹிந்தியுடன் சேர்த்து "ஹிந்துஸ்தானி" என அழைக்கப்படுகின்றது. மண்டரின், ஆங்கிலம் ஆகியவற்றுக்கு அடுத்தபடியாக மூன்றாவது கூடிய அளவு மக்களால் புரிந்து கொள்ளப்படக்கூடியது ஹிந்துஸ்தானியேயாகும். தாய் மொழியாகப் பேசுபவர்கள் எண்ணிக்கையின் அடிப்படையில் உருது உலகின் 20 ஆவது பெரிய மொழியாகும். 6 கோடி மக்கள் இதனைத் தாய் மொழியாகக் கொண்டுள்ளார்கள். இரண்டாவது மொழியாகக் கொண்டுள்ளவர்கள் உட்பட 11 கோடிப் பேர் இதனைப் பேசுகிறார்கள். உருது பாகிஸ்தானின் அரசகரும மொழியாகவும், இந்தியாவின் அரசகரும மொழிகளுள் ஒன்றாகவும் விளங்குகிறது.
-</p>
-<p dir="rtl">اردو ہندوآریائی زبانوں کی ہندويورپی شاخ کی ایک زبان ہے جو تيرھويں صدی ميں بر صغير ميں پيدا ہوئی ـ اردو پاکستان کی سرکاری زبان ہے اور بھارت کی سرکاری زبانوں ميں سے ايک ہے۔ اردو بھارت ميں 5 کروڑ اور پاکستان ميں 1 کروڑ لوگوں کی مادری زبان ہے مگر اسے بھارت اور پاکستان کے تقریباً 50 کروڑ لوگ بول اور سمجھ سکتے ھیں ۔ جن میں سے تقریباً 10.5 کروڑ لوگ اسے باقاعدہ بولتے ھیں۔
-</p>
-
-<h1 dir="rtl">بايثون</h1>
-<p dir="rtl"><strong>بايثون</strong> لغة برمجة حديثة بسيطة، واضحة، سريعة ، تستخدم أسلوب البرمجة الكائنية (OOP) وقابلة للتطوير بالإضافة إلى أنها مجانية و مفتوحة المصدر. صُنفت بالأساس كلغة تفسيرية ، بايثون مصممة أصلاً للأداء بعض المهام الخاصة أو المحدودة. إلا أنه يمكن استخدامها بايثون لإنجاز المشاريع الضخمه كأي لغة برمجية أخرى، غالباً ما يُنصح المبتدئين في ميدان البرمجة بتعلم هذه اللغة لأنها من بين أسهل اللغات البرمجية تعلماً.
-</p>
-<p dir="rtl">|||||||||||||||||||||||||||||THIS SHOULD BE RTL|||||||||||||||||||||||||
-</p>
-<p dir="rtl">(نشأت بايثون في مركز CWI (مركز العلوم والحاسب الآلي) بأمستردام على يد جويدو فان رُزوم. تم تطويرها بلغة C. أطلق فان رُزوم اسم "بايثون" على لغته تعبيرًا عن إعجابه بفِرقَة مسرحية هزلية شهيرة من بريطانيا، كانت تطلق على نفسها اسم مونتي بايثون Monty Python.
-</p>
-<p dir="rtl">تتميز بايثون بمجتمعها النشط ، كما أن لها الكثير من المكتبات البرمجية ذات الأغراض الخاصة والتي برمجها أشخاص من مجتمع هذه اللغة ، مثلاً مكتبة PyGame التي توفر مجموعه من الوظائف من اجل برمجة الالعاب. ويمكن لبايثون التعامل مع العديد من أنواع قواعد البيانات مثل MySQL وغيره.
-</p>
-
-<h2 dir="rtl">أمثلة</h2>
-<p dir="rtl">مثال Hello World!
-</p>
-<pre><code>print "Hello World!"
-</code></pre><p dir="rtl">مثال لاستخراج المضروب Factorial :
-</p>
-<pre><code>num = 1
+<p>
+ <strong>Python</strong>(パイソン)は、<a href="http://en.wikipedia.org/wiki/Guido_van_Rossum">Guido van Rossum</a> によって作られたオープンソースのオブジェクト指向スクリプト言語。<a href="http://ja.wikipedia.org/wiki/Perl">Perl</a>とともに欧米で広く普及している。イギリスのテレビ局 BBC が製作したコメディ番組『空飛ぶモンティ・パイソン』にちなんで名付けられた。 (Pythonには、爬虫類のニシキヘビの意味があり、Python言語のマスコットやアイコンとして使われることがある。)</p>
+<p>|||||||||||||||||||||||||||||THIS SHOULD BE LTR|||||||||||||||||||||||||</p>
+<p dir="rtl">|||||||||||||||||||||||||||||THIS SHOULD BE RTL||||||||||||||||||||||||| </p>
+<p dir="ltr">(<strong>بايثون</strong> لغة برمجة حديثة بسيطة، واضحة، سريعة ، تستخدم أسلوب البرمجة الكائنية (THIS SHOULD BE LTR ) وقابلة للتطوير بالإضافة إلى أنها مجانية و مفتوح </p>
+<p>پایتون زبان برنامه‌نویسی تفسیری و سطح بالا ، شی‌گرا و یک زبان برنامه‌نویسی تفسیری سمت سرور قدرتمند است که توسط گیدو ون روسوم در سال ۱۹۹۰ ساخته شد. این زبان در ویژگی‌ها شبیه پرل، روبی، اسکیم، اسمال‌تاک و تی‌سی‌ال است و از مدیریت خودکار حافظه استفاده می‌کند</p>
+<p>Python,是一种面向对象的、直譯式的计算机程序设计语言,也是一种功能强大而完善的通用型语言,已经具有十多年的发展历史,成熟且稳定。</p>
+<p>ބްލޫ ވޭލްގެ ދޫ މަތީގައި އެއްފަހަރާ 50 މީހުންނަށް ތިބެވިދާނެވެ. ބޮޑު މަހުގެ ދުލަކީ އެހާމެ ބޮޑު އެއްޗެކެވެ.</p>
+<p>
+ <strong>உருது</strong> 13ஆம் நூற்றாண்டில் உருவான ஒரு இந்தோ-ஐரோப்பிய மொழியாகும். உருது, ஹிந்தியுடன் சேர்த்து "ஹிந்துஸ்தானி" என அழைக்கப்படுகின்றது. மண்டரின், ஆங்கிலம் ஆகியவற்றுக்கு அடுத்தபடியாக மூன்றாவது கூடிய அளவு மக்களால் புரிந்து கொள்ளப்படக்கூடியது ஹிந்துஸ்தானியேயாகும். தாய் மொழியாகப் பேசுபவர்கள் எண்ணிக்கையின் அடிப்படையில் உருது உலகின் 20 ஆவது பெரிய மொழியாகும். 6 கோடி மக்கள் இதனைத் தாய் மொழியாகக் கொண்டுள்ளார்கள். இரண்டாவது மொழியாகக் கொண்டுள்ளவர்கள் உட்பட 11 கோடிப் பேர் இதனைப் பேசுகிறார்கள். உருது பாகிஸ்தானின் அரசகரும மொழியாகவும், இந்தியாவின் அரசகரும மொழிகளுள் ஒன்றாகவும் விளங்குகிறது.</p>
+<p>اردو ہندوآریائی زبانوں کی ہندويورپی شاخ کی ایک زبان ہے جو تيرھويں صدی ميں بر صغير ميں پيدا ہوئی ـ اردو پاکستان کی سرکاری زبان ہے اور بھارت کی سرکاری زبانوں ميں سے ايک ہے۔ اردو بھارت ميں 5 کروڑ اور پاکستان ميں 1 کروڑ لوگوں کی مادری زبان ہے مگر اسے بھارت اور پاکستان کے تقریباً 50 کروڑ لوگ بول اور سمجھ سکتے ھیں ۔ جن میں سے تقریباً 10.5 کروڑ لوگ اسے باقاعدہ بولتے ھیں۔</p>
+<h1>بايثون</h1>
+<p>
+ <strong>بايثون</strong> لغة برمجة حديثة بسيطة، واضحة، سريعة ، تستخدم أسلوب البرمجة الكائنية (OOP) وقابلة للتطوير بالإضافة إلى أنها مجانية و مفتوحة المصدر. صُنفت بالأساس كلغة تفسيرية ، بايثون مصممة أصلاً للأداء بعض المهام الخاصة أو المحدودة. إلا أنه يمكن استخدامها بايثون لإنجاز المشاريع الضخمه كأي لغة برمجية أخرى، غالباً ما يُنصح المبتدئين في ميدان البرمجة بتعلم هذه اللغة لأنها من بين أسهل اللغات البرمجية تعلماً.</p>
+<p>|||||||||||||||||||||||||||||THIS SHOULD BE RTL|||||||||||||||||||||||||</p>
+<p>(نشأت بايثون في مركز CWI (مركز العلوم والحاسب الآلي) بأمستردام على يد جويدو فان رُزوم. تم تطويرها بلغة C. أطلق فان رُزوم اسم "بايثون" على لغته تعبيرًا عن إعجابه بفِرقَة مسرحية هزلية شهيرة من بريطانيا، كانت تطلق على نفسها اسم مونتي بايثون Monty Python.</p>
+<p>تتميز بايثون بمجتمعها النشط ، كما أن لها الكثير من المكتبات البرمجية ذات الأغراض الخاصة والتي برمجها أشخاص من مجتمع هذه اللغة ، مثلاً مكتبة PyGame التي توفر مجموعه من الوظائف من اجل برمجة الالعاب. ويمكن لبايثون التعامل مع العديد من أنواع قواعد البيانات مثل MySQL وغيره.</p>
+<h2>أمثلة</h2>
+<p>مثال Hello World!</p>
+<pre>
+ <code>print "Hello World!"
+</code>
+</pre>
+<p>مثال لاستخراج المضروب Factorial :</p>
+<pre>
+ <code>num = 1
x = raw_input('Insert the number please ')
x = int(x)
@@ -45,16 +35,17 @@ else:
x = x-1
print num
-</code></pre>
-<h2 dir="rtl">وصلات خارجية</h2>
-<ul dir="rtl">
- <li dir="rtl">
- <a href="http://www.python.org">الموقع الرسمي للغة بايثون</a>
- </li>
+</code>
+</pre>
+<h2>وصلات خارجية</h2>
+<ul>
+ <li>
+ <a href="http://www.python.org">الموقع الرسمي للغة بايثون</a>
+ </li>
</ul>
-<p dir="rtl"> بذرة حاس
-</p>
-<p><strong>Недвард «Нед» Фландерс</strong> (Nedward «Ned» Flanders) — вымышленный персонаж мультсериала «[Симпсоны][]», озвученный Гарри Ширером. Он и его семья живут по соседству от семьи Симпсонов. Набожный христианин, Нед является одним из столпов морали Спрингфилда. В эпизоде «Alone Again, Natura-Diddily» он овдовел, его жена Мод погибла в результате несчастного случая.
-</p>
+<p> بذرة حاس </p>
+<p>
+ <strong>Недвард «Нед» Фландерс</strong> (Nedward «Ned» Flanders) — вымышленный персонаж мультсериала «[Симпсоны][]», озвученный Гарри Ширером. Он и его семья живут по соседству от семьи Симпсонов. Набожный христианин, Нед является одним из столпов морали Спрингфилда. В эпизоде «Alone Again, Natura-Diddily» он овдовел, его жена Мод погибла в результате несчастного случая. </p>
<p>Нед был одним из первых персонажей в мультсериале, который не был членом семьи Симпсонов. Начиная с первых серий, он регулярно появляется в «Симпсонах». Считается, что Нед Фландерс был назван в честь улицы <em>Northeast Flanders St.</em> в <a href="http://www.portland.gov">Портленде</a>, Орегон, родном городе создателя мультсериала Мэтта Грейнинга]]. Надпись на указателе улицы <em>NE Flanders St.</em> хулиганы часто исправляли на <em>NED Flanders St.</em>
-</p> \ No newline at end of file
+</p>
+
diff --git a/tests/misc/blank-block-quote.html b/tests/misc/blank-block-quote.html
index 1544925..eae7ae0 100644
--- a/tests/misc/blank-block-quote.html
+++ b/tests/misc/blank-block-quote.html
@@ -1,7 +1,3 @@
-
-<p>aaaaaaaaaaa
-</p>
-<blockquote></blockquote><p>bbbbbbbbbbb
-</p>
-
-
+<p>aaaaaaaaaaa</p>
+<blockquote />
+<p>bbbbbbbbbbb</p>
diff --git a/tests/misc/blockquote-below-paragraph.html b/tests/misc/blockquote-below-paragraph.html
new file mode 100755
index 0000000..5757a9a
--- /dev/null
+++ b/tests/misc/blockquote-below-paragraph.html
@@ -0,0 +1,6 @@
+<p>Paragraph</p>
+<blockquote>
+ <p>Block quote
+Yep</p>
+</blockquote>
+
diff --git a/tests/misc/blockquote-below-paragraph.txt b/tests/misc/blockquote-below-paragraph.txt
new file mode 100755
index 0000000..0dd4d5c
--- /dev/null
+++ b/tests/misc/blockquote-below-paragraph.txt
@@ -0,0 +1,3 @@
+Paragraph
+>Block quote
+>Yep
diff --git a/tests/misc/blockquote-hr.html b/tests/misc/blockquote-hr.html
index 1d7af0f..e437766 100644
--- a/tests/misc/blockquote-hr.html
+++ b/tests/misc/blockquote-hr.html
@@ -1,23 +1,16 @@
-<p>This is a paragraph.
-</p>
+<p>This is a paragraph.</p>
<hr />
-
-<blockquote><p>Block quote with horizontal lines.
-</p>
+<blockquote>
+<p>Block quote with horizontal lines.</p>
<hr />
-
-<blockquote><p>Double block quote.
-</p>
+<blockquote>
+<p>Double block quote.</p>
<hr />
-
-<p>End of the double block quote.
-</p>
-</blockquote><p>A new paragraph.
- With multiple lines.
- Even a lazy line.
-</p>
+<p>End of the double block quote.</p>
+</blockquote>
+<p>A new paragraph.
+With multiple lines.
+Even a lazy line.</p>
<hr />
-
-<p>The last line.
-</p>
-</blockquote> \ No newline at end of file
+<p>The last line.</p>
+</blockquote>
diff --git a/tests/misc/blockquote.html b/tests/misc/blockquote.html
index ec16d1f..429c6d0 100644
--- a/tests/misc/blockquote.html
+++ b/tests/misc/blockquote.html
@@ -1,22 +1,25 @@
-<blockquote><p>blockquote with no whitespace before <code>&gt;</code>.
-</p>
-</blockquote><p>foo
-</p>
-<blockquote><p>blockquote with one space before the <code>&gt;</code>.
-</p>
-</blockquote><p>bar
-</p>
-<blockquote><p>blockquote with 2 spaces.
-</p>
-</blockquote><p>baz
-</p>
-<p> &gt; this has three spaces so its a paragraph.
-</p>
-<p>blah
-</p>
-<pre><code>&gt; this one had four so it's a code block.
-</code></pre><blockquote><blockquote><p>this nested blockquote has 0 on level one and 3 (one after the first <code>&gt;</code> + 2 more) on level 2.
-</p>
-</blockquote><p> &gt; and this has 4 on level 2 - another code block.
-</p>
-</blockquote> \ No newline at end of file
+<blockquote>
+ <p>blockquote with no whitespace before <code>&gt;</code>.</p>
+</blockquote>
+<p>foo</p>
+<blockquote>
+ <p>blockquote with one space before the <code>&gt;</code>.</p>
+</blockquote>
+<p>bar</p>
+<blockquote>
+ <p>blockquote with 2 spaces.</p>
+</blockquote>
+<p>baz</p>
+<p> &gt; this has three spaces so its a paragraph.</p>
+<p>blah</p>
+<pre>
+ <code>&gt; this one had four so it's a code block.
+</code>
+</pre>
+<blockquote>
+ <blockquote>
+ <p>this nested blockquote has 0 on level one and 3 (one after the first <code>&gt;</code> + 2 more) on level 2.</p>
+ </blockquote>
+ <p> &gt; and this has 4 on level 2 - another code block.</p>
+</blockquote>
+
diff --git a/tests/misc/bold_links.html b/tests/misc/bold_links.html
index 7a1fadb..55dcaa3 100644
--- a/tests/misc/bold_links.html
+++ b/tests/misc/bold_links.html
@@ -1,2 +1,4 @@
-<p><strong>bold <a href="http://example.com">link</a></strong>
-</p>
+<p>
+<strong>bold <a href="http://example.com">link</a>
+</strong>
+</p> \ No newline at end of file
diff --git a/tests/misc/br.html b/tests/misc/br.html
index 6a521e1..52b4796 100644
--- a/tests/misc/br.html
+++ b/tests/misc/br.html
@@ -1,16 +1,13 @@
-
-<p>Output:
-</p>
-<pre><code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
+<p>Output:</p>
+<pre>
+<code>&lt;p&gt;Some of these words &lt;em&gt;are emphasized&lt;/em&gt;.
Some of these words &lt;em&gt;are emphasized also&lt;/em&gt;.&lt;/p&gt;
&lt;p&gt;Use two asterisks for &lt;strong&gt;strong emphasis&lt;/strong&gt;.
Or, if you prefer, &lt;strong&gt;use two underscores instead&lt;/strong&gt;.&lt;/p&gt;
-</code></pre>
+</code>
+</pre>
<h2>Lists</h2>
<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
- <code>+</code>, and <code>-</code>) as list markers. These three markers are
- interchangable; this:
-</p>
-
-
+<code>+</code>, and <code>-</code>) as list markers. These three markers are
+interchangable; this:</p> \ No newline at end of file
diff --git a/tests/misc/bracket_re.html b/tests/misc/bracket_re.html
index 04245ad..f48a612 100644
--- a/tests/misc/bracket_re.html
+++ b/tests/misc/bracket_re.html
@@ -1,61 +1,60 @@
<p>[x
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
- xxx xxx xxx xxx xxx xxx xxx xxx
-</p> \ No newline at end of file
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx
+xxx xxx xxx xxx xxx xxx xxx xxx</p> \ No newline at end of file
diff --git a/tests/misc/code-first-line.html b/tests/misc/code-first-line.html
index 172b17c..177463f 100644
--- a/tests/misc/code-first-line.html
+++ b/tests/misc/code-first-line.html
@@ -1,2 +1,4 @@
-<pre><code>print "This is a code block."
-</code></pre>
+<pre>
+<code>print "This is a code block."
+</code>
+</pre> \ No newline at end of file
diff --git a/tests/misc/comments.html b/tests/misc/comments.html
index 62b830b..7a03e53 100644
--- a/tests/misc/comments.html
+++ b/tests/misc/comments.html
@@ -1,11 +1,4 @@
-
-<p>X&lt;0
-</p>
-<p>X&gt;0
-</p>
-<!-- A comment -->
-
-<div>as if</div>
-
-
-
+<p>X&lt;0</p>
+<p>X&gt;0</p>
+<p><!-- A comment --></p>
+<p><div>as if</div></p> \ No newline at end of file
diff --git a/tests/misc/div.html b/tests/misc/div.html
index 798024d..634136c 100644
--- a/tests/misc/div.html
+++ b/tests/misc/div.html
@@ -1,9 +1,5 @@
-
-<div id="sidebar">
-
-<p> <em>foo</em>
+<p><div id="sidebar"></p>
+<p>
+<em>foo</em>
</p>
-</div>
-
-
-
+<p></div></p> \ No newline at end of file
diff --git a/tests/misc/email.html b/tests/misc/email.html
index 2fac140..d3bf521 100644
--- a/tests/misc/email.html
+++ b/tests/misc/email.html
@@ -1,6 +1,3 @@
-
<p>asdfasdfadsfasd <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#121;&#117;&#114;&#105;&#64;&#102;&#114;&#101;&#101;&#119;&#105;&#115;&#100;&#111;&#109;&#46;&#111;&#114;&#103;">&#121;&#117;&#114;&#105;&#64;&#102;&#114;&#101;&#101;&#119;&#105;&#115;&#100;&#111;&#109;&#46;&#111;&#114;&#103;</a> or you can say
- instead <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#121;&#117;&#114;&#105;&#64;&#102;&#114;&#101;&#101;&#119;&#105;&#115;&#100;&#111;&#109;&#46;&#111;&#114;&#103;">&#121;&#117;&#114;&#105;&#64;&#102;&#114;&#101;&#101;&#119;&#105;&#115;&#100;&#111;&#109;&#46;&#111;&#114;&#103;</a>
+instead <a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#121;&#117;&#114;&#105;&#64;&#102;&#114;&#101;&#101;&#119;&#105;&#115;&#100;&#111;&#109;&#46;&#111;&#114;&#103;">&#121;&#117;&#114;&#105;&#64;&#102;&#114;&#101;&#101;&#119;&#105;&#115;&#100;&#111;&#109;&#46;&#111;&#114;&#103;</a>
</p>
-
-
diff --git a/tests/misc/funky-list.html b/tests/misc/funky-list.html
index ebe8263..f489d56 100644
--- a/tests/misc/funky-list.html
+++ b/tests/misc/funky-list.html
@@ -1,31 +1,12 @@
-
<ol>
- <li>
- this starts a list <em>with</em> numbers
- </li>
-
- <li>
- this will show as number "2"
- </li>
-
- <li>
- this will show as number "3."
- </li>
-
- <li>
- any number, +, -, or * will keep the list going.
- </li>
+ <li>this starts a list <em>with</em> numbers</li>
+ <li>this will show as number "2"</li>
+ <li>this will show as number "3."</li>
+ <li>any number, +, -, or * will keep the list going.</li>
</ol>
-<p>aaaaaaaaaaaaaaa
-</p>
+<p>aaaaaaaaaaaaaaa</p>
<ul>
- <li>
- now a normal list
- </li>
-
- <li>
- and more
- </li>
+ <li>now a normal list</li>
+ <li>and more</li>
</ul>
-
diff --git a/tests/misc/h1.html b/tests/misc/h1.html
index f3110f8..fbf9b4d 100644
--- a/tests/misc/h1.html
+++ b/tests/misc/h1.html
@@ -1,9 +1,3 @@
-
-
<h2>Header</h2>
-
<h1>Header 2</h1>
-
-<h3>H3</h3>
-
-
+<h3>H3</h3> \ No newline at end of file
diff --git a/tests/misc/hash.html b/tests/misc/hash.html
index b78babc..5e8bffb 100644
--- a/tests/misc/hash.html
+++ b/tests/misc/hash.html
@@ -1,18 +1,9 @@
-
-<p>a
-</p>
-<pre>
+<p>a</p>
+<p><pre>
#!/usr/bin/python
-
-hello</pre>
-
-<p>a
-</p>
-<pre>
+hello</pre></p>
+<p>a</p>
+<p><pre>
!/usr/bin/python
-hello</pre>
-
-<p>a
-</p>
-
-
+hello</pre></p>
+<p>a</p> \ No newline at end of file
diff --git a/tests/misc/headers.html b/tests/misc/headers.html
index 7041eda..a65d7b2 100644
--- a/tests/misc/headers.html
+++ b/tests/misc/headers.html
@@ -1,12 +1,9 @@
<h3>Hello world</h3>
<p>Line 2
- Line 3
-</p>
-
+Line 3</p>
<h1>[Markdown][5]</h1>
-
-<h1><a href="http://some.link.com/">Markdown</a></h1>
-
+<h1>
+<a href="http://some.link.com/">Markdown</a>
+</h1>
<h1>[5]: http://foo.com/</h1>
-
-<h1>Issue #1: Markdown</h1>
+<h1>Issue #1: Markdown</h1> \ No newline at end of file
diff --git a/tests/misc/hline.html b/tests/misc/hline.html
index 8d33101..b18a311 100644
--- a/tests/misc/hline.html
+++ b/tests/misc/hline.html
@@ -1,7 +1,2 @@
-
-
<h1>Header</h1>
-<p>Next line
-</p>
-
-
+<p>Next line</p> \ No newline at end of file
diff --git a/tests/misc/html-comments.html b/tests/misc/html-comments.html
new file mode 100755
index 0000000..ac08082
--- /dev/null
+++ b/tests/misc/html-comments.html
@@ -0,0 +1,2 @@
+<p>Here is HTML <!-- **comment** -->
+and once more <p><!--comment--></p></p>
diff --git a/tests/misc/html-comments.txt b/tests/misc/html-comments.txt
new file mode 100755
index 0000000..cac4da5
--- /dev/null
+++ b/tests/misc/html-comments.txt
@@ -0,0 +1,2 @@
+Here is HTML <!-- **comment** -->
+and once more <p><!--comment--></p>
diff --git a/tests/misc/html.html b/tests/misc/html.html
index 1209112..81b8b62 100644
--- a/tests/misc/html.html
+++ b/tests/misc/html.html
@@ -1,10 +1,8 @@
-<h1>Block level html</h1>
-
-<p>Some inline <b>stuff<b>.<br />
+<p><h1>Block level html</h1></p>
+<p>Some inline <b>stuff<b>.<br />
</p>
-<p>Now some <arbitrary>arbitrary tags</arbitrary>.
-</p>
-<p>And of course <script>blah</script>.
-</p>
-<p><a href="script&gt;stuff&lt;/script">this <script>link</a>
+<p>Now some <arbitrary>arbitrary tags</arbitrary>.</p>
+<p>And of course <script>blah</script>.</p>
+<p>
+<a href="script&gt;stuff&lt;/script">this <script>link</a>
</p> \ No newline at end of file
diff --git a/tests/misc/image-2.html b/tests/misc/image-2.html
index ddc8f3f..bb649e7 100644
--- a/tests/misc/image-2.html
+++ b/tests/misc/image-2.html
@@ -1,7 +1,10 @@
-
-<p><a href="http://src.com/"><em>link!</em></a>
+<p>
+<a href="http://src.com/">
+<em>link!</em>
+</a>
</p>
-<p>*<a href="http://www.freewisdom.org">link</a>*
-</p>
-
-
+<p>
+<em>
+<a href="http://www.freewisdom.org">link</a>
+</em>
+</p> \ No newline at end of file
diff --git a/tests/misc/image.html b/tests/misc/image.html
index 0f449a5..e87015c 100644
--- a/tests/misc/image.html
+++ b/tests/misc/image.html
@@ -1,5 +1,3 @@
-
-<p><img src="http://humane_man.jpg" title="The most humane man." alt="Poster"/>
+<p>
+<img alt="Poster" src="http://humane_man.jpg" title="The most humane man." />
</p>
-
-
diff --git a/tests/misc/image_in_links.html b/tests/misc/image_in_links.html
index e2027b5..d3edba9 100644
--- a/tests/misc/image_in_links.html
+++ b/tests/misc/image_in_links.html
@@ -1,2 +1,5 @@
-<p><a href="path/to/image.png"><img src="path/to/img_thumb.png" alt="altname"/></a>
-</p> \ No newline at end of file
+<p>
+<a href="path/to/image.png">
+<img alt="altname" src="path/to/img_thumb.png" />
+</a>
+</p>
diff --git a/tests/misc/inside_html.html b/tests/misc/inside_html.html
index 5e128ab..673beac 100644
--- a/tests/misc/inside_html.html
+++ b/tests/misc/inside_html.html
@@ -1,5 +1 @@
-
-<p><a href="stuff"> <strong>ok</strong>? </a>
-</p>
-
-
+<p><a href="stuff"> <strong>ok</strong>? </a></p>
diff --git a/tests/misc/japanese.html b/tests/misc/japanese.html
index c23effd..ae1df5d 100644
--- a/tests/misc/japanese.html
+++ b/tests/misc/japanese.html
@@ -1,28 +1,13 @@
-
-
<h1>パイソン (Python)</h1>
-<p><strong>Python</strong>(パイソン)は、<a href="http://en.wikipedia.org/wiki/Guido_van_Rossum">Guido van Rossum</a> によって作られたオープンソースのオブジェクト指向スクリプト言語。<a href="http://ja.wikipedia.org/wiki/Perl">Perl</a>とともに欧米で広く普及している。イギリスのテレビ局 BBC が製作したコメディ番組『空飛ぶモンティ・パイソン』にちなんで名付けられた。 (Pythonには、爬虫類のニシキヘビの意味があり、Python言語のマスコットやアイコンとして使われることがある。)
-</p>
-
+<p>
+ <strong>Python</strong>(パイソン)は、<a href="http://en.wikipedia.org/wiki/Guido_van_Rossum">Guido van Rossum</a> によって作られたオープンソースのオブジェクト指向スクリプト言語。<a href="http://ja.wikipedia.org/wiki/Perl">Perl</a>とともに欧米で広く普及している。イギリスのテレビ局 BBC が製作したコメディ番組『空飛ぶモンティ・パイソン』にちなんで名付けられた。 (Pythonには、爬虫類のニシキヘビの意味があり、Python言語のマスコットやアイコンとして使われることがある。)</p>
<h2>概要</h2>
-<p>プログラミング言語 Python は初心者から専門家まで幅広いユーザ層を獲得している。利用目的は汎用で、方向性としてはJavaに近い。ただし、最初からネットワーク利用をメインとして考えられているJavaよりセキュリティについてはやや寛大である。多くのプラットフォームをサポートしており(⇒<a href="#somelink">動作するプラットフォーム</a>)、豊富なライブラリがあることから、産業界でも利用が増えつつある。また、Pythonは純粋なプログラミング言語のほかにも、多くの異なる言語で書かれたモジュールをまとめる糊言語のひとつとして位置づけることができる。実際Pythonは多くの商用アプリケーションでスクリプト言語として採用されている(⇒Pythonを使っている製品あるいはソフトウェアの一覧)。豊富なドキュメントをもち、Unicodeによる文字列操作をサポートしており、日本語処理も標準で可能である。
-</p>
-<p>Python は基本的にインタプリタ上で実行されることを念頭において設計されており、以下のような特徴をもっている:
-</p>
+<p>プログラミング言語 Python は初心者から専門家まで幅広いユーザ層を獲得している。利用目的は汎用で、方向性としてはJavaに近い。ただし、最初からネットワーク利用をメインとして考えられているJavaよりセキュリティについてはやや寛大である。多くのプラットフォームをサポートしており(⇒<a href="#somelink">動作するプラットフォーム</a>)、豊富なライブラリがあることから、産業界でも利用が増えつつある。また、Pythonは純粋なプログラミング言語のほかにも、多くの異なる言語で書かれたモジュールをまとめる糊言語のひとつとして位置づけることができる。実際Pythonは多くの商用アプリケーションでスクリプト言語として採用されている(⇒Pythonを使っている製品あるいはソフトウェアの一覧)。豊富なドキュメントをもち、Unicodeによる文字列操作をサポートしており、日本語処理も標準で可能である。</p>
+<p>Python は基本的にインタプリタ上で実行されることを念頭において設計されており、以下のような特徴をもっている:</p>
<ul>
- <li>
- 動的な型付け。
- </li>
-
- <li>
- オブジェクトのメンバに対するアクセスが制限されていない。(属性や専用のメソッドフックを実装することによって制限は可能。)
- </li>
-
- <li>
- モジュール、クラス、オブジェクト等の言語の要素が内部からアクセス可能であり、リフレクションを利用した記述が可能。
- </li>
+ <li>動的な型付け。</li>
+ <li>オブジェクトのメンバに対するアクセスが制限されていない。(属性や専用のメソッドフックを実装することによって制限は可能。)</li>
+ <li>モジュール、クラス、オブジェクト等の言語の要素が内部からアクセス可能であり、リフレクションを利用した記述が可能。</li>
</ul>
-<p>また、Pythonではインデントによりブロックを指定する構文を採用している(⇒<a href="#jklj">オフサイドルール</a>)。この構文はPythonに慣れたユーザからは称賛をもって受け入れられているが、他の言語のユーザからは批判も多い。このほかにも、大きすぎる実行ファイルや、Javaに比べて遅い処理速度などが欠点として指摘されている。しかし <strong>プロトタイピング</strong> の際にはこれらの点はさして問題とはならないことから、研究開発部門では頻繁に利用されている。
-</p>
-
+<p>また、Pythonではインデントによりブロックを指定する構文を採用している(⇒<a href="#jklj">オフサイドルール</a>)。この構文はPythonに慣れたユーザからは称賛をもって受け入れられているが、他の言語のユーザからは批判も多い。このほかにも、大きすぎる実行ファイルや、Javaに比べて遅い処理速度などが欠点として指摘されている。しかし <strong>プロトタイピング</strong> の際にはこれらの点はさして問題とはならないことから、研究開発部門では頻繁に利用されている。</p>
diff --git a/tests/misc/lazy-block-quote.html b/tests/misc/lazy-block-quote.html
index 14da987..7a88263 100644
--- a/tests/misc/lazy-block-quote.html
+++ b/tests/misc/lazy-block-quote.html
@@ -1,9 +1,6 @@
-
-<blockquote><p>Line one of lazy block quote.
- Line two of lazy block quote.
-</p>
+<blockquote>
+<p>Line one of lazy block quote.
+Line two of lazy block quote.</p>
<p>Line one of paragraph two.
- Line two of paragraph two.
-</p>
-</blockquote>
-
+Line two of paragraph two.</p>
+</blockquote> \ No newline at end of file
diff --git a/tests/misc/link-with-parenthesis.html b/tests/misc/link-with-parenthesis.html
new file mode 100644
index 0000000..5f6d1b1
--- /dev/null
+++ b/tests/misc/link-with-parenthesis.html
@@ -0,0 +1,3 @@
+<p>
+ <a href="http://en.wikipedia.org/wiki/ZIP_(file_format)" title="ZIP (file format) - Wikipedia, the free encyclopedia">ZIP archives</a>
+</p>
diff --git a/tests/misc/link-with-parenthesis.txt b/tests/misc/link-with-parenthesis.txt
new file mode 100644
index 0000000..8affc98
--- /dev/null
+++ b/tests/misc/link-with-parenthesis.txt
@@ -0,0 +1 @@
+[ZIP archives](http://en.wikipedia.org/wiki/ZIP_(file_format) "ZIP (file format) - Wikipedia, the free encyclopedia")
diff --git a/tests/misc/lists.html b/tests/misc/lists.html
index fa4be24..bf4a02b 100644
--- a/tests/misc/lists.html
+++ b/tests/misc/lists.html
@@ -1,64 +1,36 @@
-
<ul>
- <li>
- A multi-paragraph list,
- unindented.
- </li>
+<li>A multi-paragraph list,
+unindented.</li>
</ul>
-<p>Simple tight list
-</p>
+<p>Simple tight list</p>
<ul>
- <li>
- Uno
- </li>
-
- <li>
- Due
- </li>
-
- <li>
- Tri
- </li>
+<li>Uno</li>
+<li>Due</li>
+<li>Tri</li>
</ul>
-<p>A singleton tight list:
-</p>
+<p>A singleton tight list:</p>
<ul>
- <li>
- Uno
- </li>
+<li>Uno</li>
</ul>
-<p>A lose list:
-</p>
+<p>A lose list:</p>
<ul>
- <li><p>One
-</p>
-
- </li>
-
- <li><p>Two
-</p>
-
- </li>
-
- <li><p>Three
-</p>
-
- </li>
+<li>
+<p>One</p>
+</li>
+<li>
+<p>Two</p>
+</li>
+<li>
+<p>Three</p>
+</li>
</ul>
-<p>A lose list with paragraphs
-</p>
+<p>A lose list with paragraphs</p>
<ul>
- <li><p>One one one one
-</p>
-<p>one one one one
-</p>
-
- </li>
-
- <li><p>Two two two two
-</p>
-
- </li>
-</ul>
-
-
+<li>
+<p>One one one one</p>
+<p>one one one one</p>
+</li>
+<li>
+<p>Two two two two</p>
+</li>
+</ul> \ No newline at end of file
diff --git a/tests/misc/lists2.html b/tests/misc/lists2.html
index dcc2f81..991395b 100644
--- a/tests/misc/lists2.html
+++ b/tests/misc/lists2.html
@@ -1,10 +1,5 @@
-
<ul>
- <li>
- blah blah blah
- sdf asdf asdf asdf asdf
- asda asdf asdfasd
- </li>
-</ul>
-
-
+<li>blah blah blah
+sdf asdf asdf asdf asdf
+asda asdf asdfasd</li>
+</ul> \ No newline at end of file
diff --git a/tests/misc/lists3.html b/tests/misc/lists3.html
index dcc2f81..991395b 100644
--- a/tests/misc/lists3.html
+++ b/tests/misc/lists3.html
@@ -1,10 +1,5 @@
-
<ul>
- <li>
- blah blah blah
- sdf asdf asdf asdf asdf
- asda asdf asdfasd
- </li>
-</ul>
-
-
+<li>blah blah blah
+sdf asdf asdf asdf asdf
+asda asdf asdfasd</li>
+</ul> \ No newline at end of file
diff --git a/tests/misc/lists4.html b/tests/misc/lists4.html
index 399ca9c..4b6b32c 100644
--- a/tests/misc/lists4.html
+++ b/tests/misc/lists4.html
@@ -1,21 +1,8 @@
-
<ul>
- <li>
- item1
- </li>
-
- <li>
- item2<ol>
- <li>
- Number 1
- </li>
-
- <li>
- Number 2
- </li>
+<li>item1</li>
+<li>item2<ol>
+<li>Number 1</li>
+<li>Number 2</li>
</ol>
-
- </li>
-</ul>
-
-
+</li>
+</ul> \ No newline at end of file
diff --git a/tests/misc/lists5.html b/tests/misc/lists5.html
index 460a27d..c3dbda4 100644
--- a/tests/misc/lists5.html
+++ b/tests/misc/lists5.html
@@ -1,21 +1,14 @@
-
-<blockquote><p>This is a test of a block quote
- With just two lines
-</p>
-</blockquote><p>A paragraph
-</p>
-<blockquote><p>This is a more difficult case
- With a list item inside the quote
-</p>
+<blockquote>
+<p>This is a test of a block quote
+With just two lines</p>
+</blockquote>
+<p>A paragraph</p>
+<blockquote>
+<p>This is a more difficult case
+With a list item inside the quote</p>
<ul>
- <li>
- Alpha
- </li>
-
- <li>
- Beta
- Etc.
- </li>
+<li>Alpha</li>
+<li>Beta
+Etc.</li>
</ul>
-</blockquote>
-
+</blockquote> \ No newline at end of file
diff --git a/tests/misc/markup-inside-p.html b/tests/misc/markup-inside-p.html
index 6cd0547..c0478fb 100644
--- a/tests/misc/markup-inside-p.html
+++ b/tests/misc/markup-inside-p.html
@@ -1,25 +1,17 @@
-
-<p>
+<p><p>
_foo_
-</p>
-
-<p>
+</p></p>
+<p><p>
_foo_
-</p>
-
-<p>_foo_</p>
-
-<p>
+</p></p>
+<p><p>_foo_</p></p>
+<p><p>
_foo_
-</p>
-
-<p>
+</p></p>
+<p><p>
_foo_
-</p>
-
-
-
+</p></p> \ No newline at end of file
diff --git a/tests/misc/mismatched-tags.html b/tests/misc/mismatched-tags.html
index 0004a19..edeba33 100644
--- a/tests/misc/mismatched-tags.html
+++ b/tests/misc/mismatched-tags.html
@@ -1,9 +1,5 @@
-
-<p>Some text</p><div>some more text</div>
+<p><p>Some text</p><div>some more text</div>
and a bit more
-
-
-
-
+</p> \ No newline at end of file
diff --git a/tests/misc/missing-link-def.html b/tests/misc/missing-link-def.html
new file mode 100644
index 0000000..bab2a75
--- /dev/null
+++ b/tests/misc/missing-link-def.html
@@ -0,0 +1,3 @@
+<p>This is a [missing link][empty] and a <a href="http://example.com">valid</a> and [missing][again].</p>
+
+
diff --git a/tests/misc/missing-link-def.txt b/tests/misc/missing-link-def.txt
new file mode 100644
index 0000000..44bc656
--- /dev/null
+++ b/tests/misc/missing-link-def.txt
@@ -0,0 +1,4 @@
+This is a [missing link][empty] and a [valid][link] and [missing][again].
+
+[link]: http://example.com
+
diff --git a/tests/misc/more_comments.html b/tests/misc/more_comments.html
index d3107d1..d4430cd 100644
--- a/tests/misc/more_comments.html
+++ b/tests/misc/more_comments.html
@@ -1,5 +1,4 @@
-
-<!--asd@asdfd.com>
+<p><!--asd@asdfd.com>
<!asd@asdfd.com>
@@ -7,7 +6,4 @@
Test
-
-
-
-
+</p> \ No newline at end of file
diff --git a/tests/misc/multi-line-tags.html b/tests/misc/multi-line-tags.html
index d47e1e2..6d2d97f 100644
--- a/tests/misc/multi-line-tags.html
+++ b/tests/misc/multi-line-tags.html
@@ -1,9 +1,3 @@
-
-<div>
-
-<p>asdf asdfasd
-</p>
-</div>
-
-
-
+<p><div></p>
+<p>asdf asdfasd</p>
+<p></div></p> \ No newline at end of file
diff --git a/tests/misc/multi-paragraph-block-quote.html b/tests/misc/multi-paragraph-block-quote.html
index b213d84..3602405 100644
--- a/tests/misc/multi-paragraph-block-quote.html
+++ b/tests/misc/multi-paragraph-block-quote.html
@@ -1,10 +1,8 @@
-
-<blockquote><p>This is line one of paragraph one
- This is line two of paragraph one
-</p>
-<p>This is line one of paragraph two
-</p>
-</blockquote><blockquote><p>This is another blockquote.
-</p>
+<blockquote>
+<p>This is line one of paragraph one
+This is line two of paragraph one</p>
+<p>This is line one of paragraph two</p>
</blockquote>
-
+<blockquote>
+<p>This is another blockquote.</p>
+</blockquote> \ No newline at end of file
diff --git a/tests/misc/multi-test.html b/tests/misc/multi-test.html
index 5af759b..1fc7bdb 100644
--- a/tests/misc/multi-test.html
+++ b/tests/misc/multi-test.html
@@ -1,37 +1,24 @@
-
-
<h1 id="inthebeginning">Header </h1>
-<p>Now, let's try something <em class="special">inline</em>, to see if it works
-</p>
+<p>Now, let's try something <em class="special">inline</em>, to see if it works</p>
<p>Blah blah blah <a href="http://www.slashdot.org">http://www.slashdot.org</a>
</p>
<ul>
- <li>
- Basic list
- </li>
-
- <li>
- Basic list 2
- </li>
+ <li>Basic list</li>
+ <li>Basic list 2</li>
</ul>
-<p>addss
-</p>
+<p>addss</p>
<ul>
- <li>
- Lazy list
- </li>
+ <li>Lazy list</li>
</ul>
-<p>An <a href="http://example.com" title="Title">example</a> (oops)
-</p>
+<p>An <a href="http://example.com" title="Title">example</a> (oops)</p>
<p>Now, let's use a footnote[^1]. Not bad, eh?
- Let's continue.
-</p>
+Let's continue.</p>
<p> [^1]: Here is the text of the footnote
- continued on several lines.
- some more of the footnote, etc.
-</p>
-<pre><code>Actually, another paragraph too.
-</code></pre><p>And then there is a little bit of text.
-</p>
-
+ continued on several lines.
+ some more of the footnote, etc.</p>
+<pre>
+ <code>Actually, another paragraph too.
+</code>
+</pre>
+<p>And then there is a little bit of text.</p>
diff --git a/tests/misc/multiline-comments.html b/tests/misc/multiline-comments.html
index 14d2a62..429e71b 100644
--- a/tests/misc/multiline-comments.html
+++ b/tests/misc/multiline-comments.html
@@ -1,21 +1,13 @@
-
-<!--
+<p><!--
foo
--->
-
-<p>
+--></p>
+<p><p>
foo
-</p>
-
-<div>
-
-<p>foo
-</p>
-</div>
-
-
-
+</p></p>
+<p><div></p>
+<p>foo</p>
+<p></div></p> \ No newline at end of file
diff --git a/tests/misc/nested-patterns.html b/tests/misc/nested-patterns.html
new file mode 100644
index 0000000..f41bb34
--- /dev/null
+++ b/tests/misc/nested-patterns.html
@@ -0,0 +1,38 @@
+<p>
+ <strong>
+ <em>
+ <a href="http://www.freewisdom.org">link</a>
+ </em>
+ </strong>
+ <strong>
+ <em>
+ <a href="http://www.freewisdom.org">link</a>
+ </em>
+ </strong>
+ <strong>
+ <a href="http://www.freewisdom.org">
+ <em>link</em>
+ </a>
+ </strong>
+ <strong>
+ <a href="http://www.freewisdom.org">
+ <em>link</em>
+ </a>
+ </strong>
+ <strong>
+ <a href="http://www.freewisdom.org">
+ <em>link</em>
+ </a>
+ </strong>
+ <strong>
+ <a href="http://www.freewisdom.org">
+ <em>link</em>
+ </a>
+ </strong>
+ <a href="http://www.freewisdom.org">
+ <strong>
+ <em>link</em>
+ </strong>
+ </a>
+</p>
+
diff --git a/tests/misc/nested-patterns.txt b/tests/misc/nested-patterns.txt
new file mode 100644
index 0000000..3f5dc3e
--- /dev/null
+++ b/tests/misc/nested-patterns.txt
@@ -0,0 +1,7 @@
+___[link](http://www.freewisdom.org)___
+***[link](http://www.freewisdom.org)***
+**[*link*](http://www.freewisdom.org)**
+__[_link_](http://www.freewisdom.org)__
+__[*link*](http://www.freewisdom.org)__
+**[_link_](http://www.freewisdom.org)**
+[***link***](http://www.freewisdom.org)
diff --git a/tests/misc/normalize.html b/tests/misc/normalize.html
index b151cf9..a6daf8f 100644
--- a/tests/misc/normalize.html
+++ b/tests/misc/normalize.html
@@ -1,5 +1,3 @@
-
-<p><a href="http://www.stuff.com/q?x=1&amp;y=2&lt;&gt;">Link</a>
-</p>
-
-
+<p>
+<a href="http://www.stuff.com/q?x=1&amp;y=2&lt;&gt;">Link</a>
+</p> \ No newline at end of file
diff --git a/tests/misc/numeric-entity.html b/tests/misc/numeric-entity.html
index 34d4e97..720a601 100644
--- a/tests/misc/numeric-entity.html
+++ b/tests/misc/numeric-entity.html
@@ -1,7 +1,4 @@
-
-<p><a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#117;&#115;&#101;&#114;&#64;&#103;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;">&#117;&#115;&#101;&#114;&#64;&#103;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;</a>
+<p>
+<a href="&#109;&#97;&#105;&#108;&#116;&#111;&#58;&#117;&#115;&#101;&#114;&#64;&#103;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;">&#117;&#115;&#101;&#114;&#64;&#103;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;</a>
</p>
-<p>This is an entity: &#234;
-</p>
-
-
+<p>This is an entity: &#234; </p>
diff --git a/tests/misc/php.html b/tests/misc/php.html
index 183716f..66bc8d1 100644
--- a/tests/misc/php.html
+++ b/tests/misc/php.html
@@ -1,16 +1,9 @@
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+<p><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<b>This should have a p tag</b>
-<!--This is a comment -->
-
-<div>This shouldn't</div>
-
-<?php echo "block_level";?>
-
-<p> &lt;?php echo "not_block_level";?&gt;
-</p>
-
-
+<!--This is a comment --></p>
+<p><div>This shouldn't</div></p>
+<p><?php echo "block_level";?></p>
+<p> &lt;?php echo "not_block_level";?&gt;</p>
diff --git a/tests/misc/pre.html b/tests/misc/pre.html
index 01f5a29..5ac9799 100644
--- a/tests/misc/pre.html
+++ b/tests/misc/pre.html
@@ -1,17 +1,12 @@
-
-<pre>
+<p><pre>
aaa
bbb
-</pre>
-
-<pre>
+</pre></p>
+<p><pre>
* and this is pre-formatted content
* and it should be printed just like this
* and not formatted as a list
-</pre>
-
-
-
+</pre></p> \ No newline at end of file
diff --git a/tests/misc/russian.html b/tests/misc/russian.html
index 01656e3..06efb99 100644
--- a/tests/misc/russian.html
+++ b/tests/misc/russian.html
@@ -1,15 +1,9 @@
-
-
<h1>Недвард «Нед» Фландерс</h1>
-<p><strong>Недвард «Нед» Фландерс</strong> (Nedward «Ned» Flanders) — вымышленный персонаж мультсериала «[Симпсоны][]», озвученный Гарри Ширером. Он и его семья живут по соседству от семьи Симпсонов. Набожный христианин, Нед является одним из столпов морали Спрингфилда. В эпизоде «Alone Again, Natura-Diddily» он овдовел, его жена Мод погибла в результате несчастного случая.
-</p>
+<p>
+ <strong>Недвард «Нед» Фландерс</strong> (Nedward «Ned» Flanders) — вымышленный персонаж мультсериала «[Симпсоны][]», озвученный Гарри Ширером. Он и его семья живут по соседству от семьи Симпсонов. Набожный христианин, Нед является одним из столпов морали Спрингфилда. В эпизоде «Alone Again, Natura-Diddily» он овдовел, его жена Мод погибла в результате несчастного случая. </p>
<p>Нед был одним из первых персонажей в мультсериале, который не был членом семьи Симпсонов. Начиная с первых серий, он регулярно появляется в «Симпсонах». Считается, что Нед Фландерс был назван в честь улицы <em>Northeast Flanders St.</em> в <a href="http://www.portland.gov">Портленде</a>, Орегон, родном городе создателя мультсериала Мэтта Грейнинга]]. Надпись на указателе улицы <em>NE Flanders St.</em> хулиганы часто исправляли на <em>NED Flanders St.</em>
</p>
-
<h2>Биография</h2>
-<p>Нед Фландерс родился в Нью-Йорке, его родители были битниками. Его отец в точности похож на взрослого Неда, только он носил козлиную бородку. Их отказ от воспитания Неда и то, что они, в общем-то, были плохими родителями («мы ничего в этом не понимаем и не знаем как начать») привело к тому, что Нед превратился в ужасного сорванца. В конце концов они согласились на экспериментальную восьмимесячную шлепологическую терапию Миннесотского Университета (воспоминания Неда в эпизоде «Hurricane Neddy»), которая научила его подавлять чувство злости. Побочным эфектом терапии стало то, что Нед стал ненавидеть своих родителей (это одна из двух вещей которые ненавидит Фландерс, вторая — отделения почты, чьи длинные очереди, суета и угрюмый персонал раздражают его).
-</p>
-<p>У Неда есть странная привычка добавлять «дидли», «дадли» и другие бессмысленные слова в свои фразы при разговоре, например: «Hi-diddly-ho, neighbor-ino» («Приветик, соседушка»). Это результат сублимации его злости, вызванной сдерживанием гнева, который не имеет никакого другого выхода.
-</p>
-
+<p>Нед Фландерс родился в Нью-Йорке, его родители были битниками. Его отец в точности похож на взрослого Неда, только он носил козлиную бородку. Их отказ от воспитания Неда и то, что они, в общем-то, были плохими родителями («мы ничего в этом не понимаем и не знаем как начать») привело к тому, что Нед превратился в ужасного сорванца. В конце концов они согласились на экспериментальную восьмимесячную шлепологическую терапию Миннесотского Университета (воспоминания Неда в эпизоде «Hurricane Neddy»), которая научила его подавлять чувство злости. Побочным эфектом терапии стало то, что Нед стал ненавидеть своих родителей (это одна из двух вещей которые ненавидит Фландерс, вторая — отделения почты, чьи длинные очереди, суета и угрюмый персонал раздражают его).</p>
+<p>У Неда есть странная привычка добавлять «дидли», «дадли» и другие бессмысленные слова в свои фразы при разговоре, например: «Hi-diddly-ho, neighbor-ino» («Приветик, соседушка»). Это результат сублимации его злости, вызванной сдерживанием гнева, который не имеет никакого другого выхода.</p>
diff --git a/tests/misc/some-test.html b/tests/misc/some-test.html
index 646482e..54a3e23 100644
--- a/tests/misc/some-test.html
+++ b/tests/misc/some-test.html
@@ -1,104 +1,72 @@
<hr />
-
<ul>
- <li><p>as if
-</p>
-
- </li>
-
- <li><p>as if2
-</p>
-
- </li>
+<li>
+<p>as if</p>
+</li>
+<li>
+<p>as if2</p>
+</li>
</ul>
<hr />
-
<ul>
- <li><p>as if
-</p>
-
- </li>
-
- <li><p>as if2
-</p>
-
- </li>
+<li>
+<p>as if</p>
+</li>
+<li>
+<p>as if2</p>
+</li>
</ul>
<hr />
-
<ul>
- <li>
- as if
- non_code
- </li>
-
- <li>
- as if2
- </li>
+<li>as if
+non_code</li>
+<li>as if2</li>
</ul>
-<p>Markdown
-</p>
+<p>Markdown</p>
<ul>
- <li><p>Python
- is ok
-</p>
+<li>
+<p>Python
+is ok</p>
<ul>
- <li>
- Therefore i am
- </li>
+<li>Therefore i am</li>
</ul>
-
- </li>
-
- <li><p>Perl sucks
- big time
-</p>
+</li>
+<li>
+<p>Perl sucks
+big time</p>
<ul>
- <li>
- But that's
- ok
- </li>
+<li>But that's
+ok</li>
</ul>
-
- </li>
-
- <li><p>Python is
- ok
- Or not?
-</p>
-
- </li>
+</li>
+<li>
+<p>Python is
+ok
+Or not?</p>
+</li>
</ul>
-<p>Here is a normal paragraph
-</p>
+<p>Here is a normal paragraph</p>
<ol>
- <li><p>Another list
- with a bunch of items
-</p>
-
- </li>
-
- <li><p>Mostly fruits
-</p>
+<li>
+<p>Another list
+with a bunch of items</p>
+</li>
+<li>
+<p>Mostly fruits</p>
<ol>
- <li>
- Apple
- </li>
-
- <li>
- Pare
- </li>
+<li>Apple</li>
+<li>Pare</li>
</ol>
-
- </li>
+</li>
</ol>
-<p>asdfasdfasd
-</p>
-<pre><code># This is a code example
+<p>asdfasdfasd</p>
+<pre>
+<code># This is a code example
import stuff
Another code example
* Lists and similar stuff
&gt; Should be ignored
-</code></pre> \ No newline at end of file
+</code>
+</pre>
diff --git a/tests/misc/span.html b/tests/misc/span.html
index 354f926..dadf5df 100644
--- a/tests/misc/span.html
+++ b/tests/misc/span.html
@@ -1,11 +1,4 @@
-
-<p><span id="someId"> Foo <em>bar</em> Baz </span>
-</p>
-<div><b>*foo*</b></div>
-
-<div id="someId"> Foo *bar* Baz </div>
-
-<p><baza id="someId"> Foo <em>bar</em> Baz </baza>
-</p>
-
-
+<p><span id="someId"> Foo <em>bar</em> Baz </span></p>
+<p><div><b>*foo*</b></div></p>
+<p><div id="someId"> Foo *bar* Baz </div></p>
+<p><baza id="someId"> Foo <em>bar</em> Baz </baza></p>
diff --git a/tests/misc/strong-with-underscores.html b/tests/misc/strong-with-underscores.html
index 0074174..cab3863 100644
--- a/tests/misc/strong-with-underscores.html
+++ b/tests/misc/strong-with-underscores.html
@@ -1,2 +1,3 @@
-<p><strong>this_is_strong</strong>
-</p>
+<p>
+<strong>this_is_strong</strong>
+</p> \ No newline at end of file
diff --git a/tests/misc/stronintags.html b/tests/misc/stronintags.html
index 8f64e04..48e4364 100644
--- a/tests/misc/stronintags.html
+++ b/tests/misc/stronintags.html
@@ -1,10 +1,10 @@
-
-<p>this is a <a href="http://example.com/"><strong>test</strong></a>
+<p>this is a <a href="http://example.com/">
+<strong>test</strong>
+</a>
</p>
-<p>this is a second <em></em><a href="http://example.com">test</a><em></em>
+<p>this is a second <strong>
+<a href="http://example.com">test</a>
+</strong>
</p>
<p>reference <strong>[test][]</strong>
- reference [<strong>test</strong>][]
-</p>
-
-
+reference [<strong>test</strong>][]</p> \ No newline at end of file
diff --git a/tests/misc/tabs-in-lists.html b/tests/misc/tabs-in-lists.html
index 7f61928..a1a92ec 100644
--- a/tests/misc/tabs-in-lists.html
+++ b/tests/misc/tabs-in-lists.html
@@ -1,67 +1,42 @@
-
-<p>First a list with a tabbed line
-</p>
+<p>First a list with a tabbed line</p>
<ul>
- <li><p>A
-</p>
-
- </li>
-
- <li><p>B
-</p>
-
- </li>
+<li>
+<p>A</p>
+</li>
+<li>
+<p>B</p>
+</li>
</ul>
-<p>Just a blank line:
-</p>
+<p>Just a blank line:</p>
<ul>
- <li><p>A
-</p>
-
- </li>
-
- <li><p>B
-</p>
-
- </li>
+<li>
+<p>A</p>
+</li>
+<li>
+<p>B</p>
+</li>
</ul>
-<p>Now a list with 4 spaces and some text:
-</p>
+<p>Now a list with 4 spaces and some text:</p>
<ul>
- <li>
- A
- abcdef
- </li>
-
- <li>
- B
- </li>
+<li>A
+abcdef</li>
+<li>B</li>
</ul>
-<p>Now with a tab and an extra space:
-</p>
+<p>Now with a tab and an extra space:</p>
<ul>
- <li><p>A
-</p>
-
- </li>
-
- <li><p>B
-</p>
-
- </li>
+<li>
+<p>A</p>
+</li>
+<li>
+<p>B</p>
+</li>
</ul>
-<p>Now a list with 4 spaces:
-</p>
+<p>Now a list with 4 spaces:</p>
<ul>
- <li><p>A
-</p>
-
- </li>
-
- <li><p>B
-</p>
-
- </li>
-</ul>
-
-
+<li>
+<p>A</p>
+</li>
+<li>
+<p>B</p>
+</li>
+</ul> \ No newline at end of file
diff --git a/tests/misc/two-spaces.html b/tests/misc/two-spaces.html
index 9d07be4..fe765de 100644
--- a/tests/misc/two-spaces.html
+++ b/tests/misc/two-spaces.html
@@ -1,31 +1,20 @@
-<p>This line has two spaces at the end<br />
-but this one has none
- but this line has three <br />
-and this is the second from last line
- in this test message
-</p>
+<p>This line has two spaces at the end<br />but this one has none
+but this line has three <br />and this is the second from last line
+in this test message</p>
<ul>
- <li><p>This list item has two spaces.<br />
-
-</p>
-
- </li>
-
- <li><p>This has none.
- This line has three. <br />
-This line has none.
- And this line two.<br />
-
-</p>
-<p>This line has none.
+<li>
+<p>This list item has two spaces.<br />
</p>
-
- </li>
-
- <li><p>This line has none.
+</li>
+<li>
+<p>This has none.
+This line has three. <br />This line has none.
+And this line two.<br />
</p>
-
- </li>
+<p>This line has none.</p>
+</li>
+<li>
+<p>This line has none.</p>
+</li>
</ul>
-<p>And this is the end.
-</p> \ No newline at end of file
+<p>And this is the end.</p> \ No newline at end of file
diff --git a/tests/misc/uche.html b/tests/misc/uche.html
index f438f3d..ddd569d 100644
--- a/tests/misc/uche.html
+++ b/tests/misc/uche.html
@@ -1,7 +1,14 @@
-<p><img src="http://fourthought.com/images/ftlogo.png" title="Fourthought logo" alt="asif"/>
+<p>
+<img alt="asif" src="http://fourthought.com/images/ftlogo.png" title="Fourthought logo" />
</p>
-<p><a href="http://fourthought.com/"><img src="http://fourthought.com/images/ftlogo.png" title="Fourthought logo" style="float: left; margin: 10px; border:
-none;" alt=""/></a>
+<p>
+<a href="http://fourthought.com/">
+<img alt="" src="http://fourthought.com/images/ftlogo.png" style="float: left; margin: 10px; border:
+none;" title="Fourthought logo" />
+</a>
+</p>
+<p>
+<a href="http://link.com/">
+<img alt="text" src="x" />
+</a>
</p>
-<p><a href="http://link.com/"><img src="x" alt="text"/></a>
-</p> \ No newline at end of file
diff --git a/tests/misc/underscores.html b/tests/misc/underscores.html
index 14ccb02..fa9e642 100644
--- a/tests/misc/underscores.html
+++ b/tests/misc/underscores.html
@@ -1,15 +1,9 @@
-
-<p>THIS_SHOULD_STAY_AS_IS
-</p>
-<p>Here is some <em>emphasis</em>, ok?
-</p>
-<p>Ok, at least <em>this</em> should work.
-</p>
-<p>THIS<strong>SHOULD</strong>STAY
-</p>
-<p>Here is some <strong>strong</strong> stuff.
-</p>
-<p>THIS<strong><em>SHOULD</em></strong>STAY?
-</p>
-
+<p>THIS_SHOULD_STAY_AS_IS</p>
+<p>Here is some <em>emphasis</em>, ok?</p>
+<p>Ok, at least <em>this</em> should work.</p>
+<p>THIS<strong>SHOULD</strong>STAY</p>
+<p>Here is some <strong>strong</strong> stuff.</p>
+<p>THIS<strong>
+ <em>SHOULD</em>
+ </strong>STAY?</p>
diff --git a/tests/misc/url_spaces.html b/tests/misc/url_spaces.html
index 78c6521..f81f55e 100644
--- a/tests/misc/url_spaces.html
+++ b/tests/misc/url_spaces.html
@@ -1,7 +1,6 @@
-
-<p><a href="http://wikipedia.org/wiki/Dawn of War">Dawn of War</a>
+<p>
+<a href="http://wikipedia.org/wiki/Dawn of War">Dawn of War</a>
</p>
-<p><a href="http://wikipedia.org/wiki/Dawn of War" title="Dawn of War">Dawn of War</a>
-</p>
-
-
+<p>
+<a href="http://wikipedia.org/wiki/Dawn of War" title="Dawn of War">Dawn of War</a>
+</p> \ No newline at end of file
diff --git a/tests/safe_mode/inline-html-advanced.html b/tests/safe_mode/inline-html-advanced.html
index bc59ab1..e9dd2ec 100644
--- a/tests/safe_mode/inline-html-advanced.html
+++ b/tests/safe_mode/inline-html-advanced.html
@@ -1,14 +1,11 @@
-<p>Simple block on one line:
-</p>
-&lt;div&gt;foo&lt;/div&gt;
-
-<p>And nested without indentation:
-</p>
-&lt;div&gt;
+<p>Simple block on one line:</p>
+<p>&lt;div&gt;foo&lt;/div&gt;</p>
+<p>And nested without indentation:</p>
+<p>&lt;div&gt;
&lt;div&gt;
&lt;div&gt;
foo
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;bar&lt;/div&gt;
-&lt;/div&gt; \ No newline at end of file
+&lt;/div&gt;</p> \ No newline at end of file
diff --git a/tests/safe_mode/inline-html-comments.html b/tests/safe_mode/inline-html-comments.html
index 8deccba..0f1e417 100644
--- a/tests/safe_mode/inline-html-comments.html
+++ b/tests/safe_mode/inline-html-comments.html
@@ -1,14 +1,8 @@
-<p>Paragraph one.
-</p>
-&lt;!-- This is a simple comment --&gt;
-
-&lt;!--
+<p>Paragraph one.</p>
+<p>&lt;!-- This is a simple comment --&gt;</p>
+<p>&lt;!--
This is another comment.
---&gt;
-
-<p>Paragraph two.
-</p>
-&lt;!-- one comment block -- -- with two comments --&gt;
-
-<p>The end.
-</p> \ No newline at end of file
+--&gt;</p>
+<p>Paragraph two.</p>
+<p>&lt;!-- one comment block -- -- with two comments --&gt;</p>
+<p>The end.</p> \ No newline at end of file
diff --git a/tests/safe_mode/inline-html-simple.html b/tests/safe_mode/inline-html-simple.html
index 24d16ba..ceaaa90 100644
--- a/tests/safe_mode/inline-html-simple.html
+++ b/tests/safe_mode/inline-html-simple.html
@@ -1,64 +1,53 @@
-<p>Here's a simple block:
-</p>
-&lt;div&gt;
+<p>Here's a simple block:</p>
+<p>&lt;div&gt;
foo
-&lt;/div&gt;
-
-<p>This should be a code block, though:
-</p>
-<pre><code>&lt;div&gt;
+&lt;/div&gt;</p>
+<p>This should be a code block, though:</p>
+<pre>
+<code>&lt;div&gt;
foo
&lt;/div&gt;
-</code></pre><p>As should this:
-</p>
-<pre><code>&lt;div&gt;foo&lt;/div&gt;
-</code></pre><p>Now, nested:
-</p>
-&lt;div&gt;
+</code>
+</pre>
+<p>As should this:</p>
+<pre>
+<code>&lt;div&gt;foo&lt;/div&gt;
+</code>
+</pre>
+<p>Now, nested:</p>
+<p>&lt;div&gt;
&lt;div&gt;
&lt;div&gt;
foo
&lt;/div&gt;
&lt;/div&gt;
-&lt;/div&gt;
-
-<p>This should just be an HTML comment:
-</p>
-&lt;!-- Comment --&gt;
-
-<p>Multiline:
-</p>
-&lt;!--
+&lt;/div&gt;</p>
+<p>This should just be an HTML comment:</p>
+<p>&lt;!-- Comment --&gt;</p>
+<p>Multiline:</p>
+<p>&lt;!--
Blah
Blah
---&gt;
-
-<p>Code block:
-</p>
-<pre><code>&lt;!-- Comment --&gt;
-</code></pre><p>Just plain comment, with trailing spaces on the line:
-</p>
-&lt;!-- foo --&gt;
-
-<p>Code:
-</p>
-<pre><code>&lt;hr /&gt;
-</code></pre><p>Hr's:
-</p>
-&lt;hr&gt;
-
-&lt;hr/&gt;
-
-&lt;hr /&gt;
-
-&lt;hr&gt;
-
-&lt;hr/&gt;
-
-&lt;hr /&gt;
-
-&lt;hr class=&quot;foo&quot; id=&quot;bar&quot; /&gt;
-
-&lt;hr class=&quot;foo&quot; id=&quot;bar&quot;/&gt;
-
-&lt;hr class=&quot;foo&quot; id=&quot;bar&quot; &gt; \ No newline at end of file
+--&gt;</p>
+<p>Code block:</p>
+<pre>
+<code>&lt;!-- Comment --&gt;
+</code>
+</pre>
+<p>Just plain comment, with trailing spaces on the line:</p>
+<p>&lt;!-- foo --&gt;</p>
+<p>Code:</p>
+<pre>
+<code>&lt;hr /&gt;
+</code>
+</pre>
+<p>Hr's:</p>
+<p>&lt;hr&gt;</p>
+<p>&lt;hr/&gt;</p>
+<p>&lt;hr /&gt;</p>
+<p>&lt;hr&gt;</p>
+<p>&lt;hr/&gt;</p>
+<p>&lt;hr /&gt;</p>
+<p>&lt;hr class=&quot;foo&quot; id=&quot;bar&quot; /&gt;</p>
+<p>&lt;hr class=&quot;foo&quot; id=&quot;bar&quot;/&gt;</p>
+<p>&lt;hr class=&quot;foo&quot; id=&quot;bar&quot; &gt;</p> \ No newline at end of file
diff --git a/tests/safe_mode/script_tags.html b/tests/safe_mode/script_tags.html
index cb2abfa..940329f 100644
--- a/tests/safe_mode/script_tags.html
+++ b/tests/safe_mode/script_tags.html
@@ -1,26 +1,20 @@
-<p>This should be stripped/escaped in safe_mode.
-</p>
-&lt;script&gt;
+<p>This should be stripped/escaped in safe_mode.</p>
+<p>&lt;script&gt;
alert(&quot;Hello world!&quot;)
-&lt;/script&gt;
-
-<p>With blank lines.
-</p>
-&lt;script&gt;
+&lt;/script&gt;</p>
+<p>With blank lines.</p>
+<p>&lt;script&gt;
alert(&quot;Hello world!&quot;)
-&lt;/script&gt;
-
-<p>Now with some weirdness
-</p>
-<p><code>&lt;script &lt;!--
+&lt;/script&gt;</p>
+<p>Now with some weirdness</p>
+<p>
+<code>&lt;script &lt;!--
alert("Hello world!")
-&lt;/script &lt;&gt;</code>`
-</p>
-<p>Try another way.
-</p>
-&lt;script &lt;!--
+&lt;/script &lt;&gt;</code> `</p>
+<p>Try another way.</p>
+<p>&lt;script &lt;!--
alert(&quot;Hello world!&quot;)
&lt;/script &lt;&gt;
@@ -30,4 +24,6 @@ This time with blank lines.
alert(&quot;Hello world!&quot;)
-&lt;/script &lt;&gt; \ No newline at end of file
+&lt;/script &lt;&gt;
+
+</p>
diff --git a/tests/safe_mode/script_tags.txt b/tests/safe_mode/script_tags.txt
index 55c60e9..44041c2 100644
--- a/tests/safe_mode/script_tags.txt
+++ b/tests/safe_mode/script_tags.txt
@@ -16,7 +16,7 @@ Now with some weirdness
``<script <!--
alert("Hello world!")
-</script <>```
+</script <>`` `
Try another way.
diff --git a/tests/safe_mode/unsafe_urls.html b/tests/safe_mode/unsafe_urls.html
index 8eda30d..6dabe64 100644
--- a/tests/safe_mode/unsafe_urls.html
+++ b/tests/safe_mode/unsafe_urls.html
@@ -1,24 +1,26 @@
-<p>These links should be unsafe and not allowed in safe_mode
+<p>These links should be unsafe and not allowed in safe_mode</p>
+<p>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
+<a href="">link</a>
</p>
-<p><a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
- <a href="">link</a>
+<p>
+<img alt="img" src="" />
+<a href="">ref</a>
+<img alt="imgref" src="" />
</p>
-<p><img src="" alt="img"/><a href="">ref</a>
- <img src="" alt="imgref"/>
+<p>These should work regardless:</p>
+<p>
+<a href="relative/url.html">relative</a>
+<a href="mailto:foo@bar.com">email</a>
+<a href="news:some.news.group.com">news scheme</a>
+<a href="http://example.com">http link</a>
</p>
-<p>These should work regardless:
-</p>
-<p><a href="relative/url.html">relative</a>
- <a href="mailto:foo@bar.com">email</a>
- <a href="news:some.news.group.com">news scheme</a>
- <a href="http://example.com">http link</a>
-</p> \ No newline at end of file