# Copyright (c) 2007-2008 ActiveState Corp.
# License: MIT (http://www.opensource.org/licenses/mit-license.php)
#
-# I used version 1.0.1.12, but deleted:
+# I used version 1.0.1.15, but deleted:
# * file-vars (emacs-style settings inside the file)
# * Standardize line endings
# * call to _do_links()
from hashlib import md5
except ImportError:
from md5 import md5
-from random import random
+from random import random, randint
DEFAULT_TAB_WIDTH = 4
-# Table of hash values for escaped characters:
-def _escape_hash(s):
- # Lame attempt to avoid possible collision with someone actually
- # using the MD5 hexdigest of one of these chars in there text.
- # Other ideas: random.random(), uuid.uuid()
+
+try:
+ import uuid
+except ImportError:
+ SECRET_SALT = str(randint(0, 1000000))
+else:
+ SECRET_SALT = str(uuid.uuid4())
+def _hash_ascii(s):
#return md5(s).hexdigest() # Markdown.pl effectively does this.
- return 'md5-'+md5(s).hexdigest()
-g_escape_table = dict([(ch, _escape_hash(ch)) for ch in '\\`*_{}[]()>#+-.!'])
+ return 'md5-' + md5(SECRET_SALT + s).hexdigest()
+def _hash_text(s):
+ return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest()
+
+# Table of hash values for escaped characters:
+g_escape_table = dict([(ch, _hash_ascii(ch))
+ for ch in '\\`*_{}[]()>#+-.!'])
text = self._run_block_gamut(text)
- text = self._unescape_special_chars(text)
-
if "footnotes" in self.extras:
text = self._add_footnotes(text)
+ text = self._unescape_special_chars(text)
+
if self.safe_mode:
text = self._unhash_html_spans(text)
Markdown.pl because of the lack of atomic matching support in
Python's regex engine used in $g_nested_brackets.
"""
- MAX_LINK_TEXT_SENTINEL = 300
+ MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
# `anchor_allowed_pos` is used to support img links inside
# anchors, but not anchors inside anchors. An anchor's start
title_str = ''
if is_img:
result = '<img src="%s" alt="%s"%s%s' \
- % (url, link_text.replace('"', '"'),
+ % (url.replace('"', '"'),
+ link_text.replace('"', '"'),
title_str, self.empty_element_suffix)
curr_pos = start_idx + len(result)
text = text[:start_idx] + result + text[match.end():]
title_str = ''
if is_img:
result = '<img src="%s" alt="%s"%s%s' \
- % (url, link_text.replace('"', '"'),
+ % (url.replace('"', '"'),
+ link_text.replace('"', '"'),
title_str, self.empty_element_suffix)
curr_pos = start_idx + len(result)
text = text[:start_idx] + result + text[match.end():]
.replace('*', g_escape_table['*'])
.replace('_', g_escape_table['_']))
link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
- hash = md5(link).hexdigest()
+ hash = _hash_text(link)
link_from_hash[hash] = link
text = text[:start] + hash + text[end:]
for hash, link in link_from_hash.items():
r = random()
# Roughly 10% raw, 45% hex, 45% dec.
# '@' *must* be encoded. I [John Gruber] insist.
- if r > 0.9 and ch != "@":
+ # Issue 26: '_' must be encoded.
+ if r > 0.9 and ch not in "@_":
return ch
elif r < 0.45:
# The [1:] is to drop leading '0': 0x63 -> x63
else:
return '&#%s;' % ord(ch)
-def _hash_text(text):
- return 'md5:'+md5(text.encode("utf-8")).hexdigest()