read_markdown.py: upgrade to 1.0.1.15

[webber.git] / plugins / read_markdown.py
diff --git a/plugins/read_markdown.py b/plugins/read_markdown.py

index c8d4fad18e63349fb4249d1efccd9fa9ffa3beaa..35b8a46675441bee462e42419614f470b8c873ff 100644 (file)
--- a/plugins/read_markdown.py
+++ b/plugins/read_markdown.py
@@ -5,7 +5,7 @@ from webber import *
  # Copyright (c) 2007-2008 ActiveState Corp.
  # License: MIT (http://www.opensource.org/licenses/mit-license.php)
  #
-# I used version 1.0.1.12, but deleted:
+# I used version 1.0.1.15, but deleted:
  #      * file-vars (emacs-style settings inside the file)
  #      * Standardize line endings
  #      * call to _do_links()
@@ -18,7 +18,7 @@ try:
      from hashlib import md5
  except ImportError:
      from md5 import md5
-from random import random
+from random import random, randint
  
  
  
@@ -42,14 +42,22 @@ DEBUG = False
  
  DEFAULT_TAB_WIDTH = 4
  
-# Table of hash values for escaped characters:
-def _escape_hash(s):
-    # Lame attempt to avoid possible collision with someone actually
-    # using the MD5 hexdigest of one of these chars in there text.
-    # Other ideas: random.random(), uuid.uuid()
+
+try:
+    import uuid
+except ImportError:
+    SECRET_SALT = str(randint(0, 1000000))
+else:
+    SECRET_SALT = str(uuid.uuid4())
+def _hash_ascii(s):
      #return md5(s).hexdigest()   # Markdown.pl effectively does this.
-    return 'md5-'+md5(s).hexdigest()
-g_escape_table = dict([(ch, _escape_hash(ch)) for ch in '\\`*_{}[]()>#+-.!'])
+    return 'md5-' + md5(SECRET_SALT + s).hexdigest()
+def _hash_text(s):
+    return 'md5-' + md5(SECRET_SALT + s.encode("utf-8")).hexdigest()
+
+# Table of hash values for escaped characters:
+g_escape_table = dict([(ch, _hash_ascii(ch))
+                       for ch in '\\`*_{}[]()>#+-.!'])
  
  
  
@@ -179,11 +187,11 @@ class Markdown(object):
  
          text = self._run_block_gamut(text)
  
-        text = self._unescape_special_chars(text)
-
          if "footnotes" in self.extras:
              text = self._add_footnotes(text)
  
+        text = self._unescape_special_chars(text)
+
          if self.safe_mode:
              text = self._unhash_html_spans(text)
  
@@ -647,7 +655,7 @@ class Markdown(object):
          Markdown.pl because of the lack of atomic matching support in
          Python's regex engine used in $g_nested_brackets.
          """
-        MAX_LINK_TEXT_SENTINEL = 300
+        MAX_LINK_TEXT_SENTINEL = 3000  # markdown2 issue 24
  
          # `anchor_allowed_pos` is used to support img links inside
          # anchors, but not anchors inside anchors. An anchor's start
@@ -743,7 +751,8 @@ class Markdown(object):
                          title_str = ''
                      if is_img:
                          result = '<img src="%s" alt="%s"%s%s' \
-                            % (url, link_text.replace('"', '&quot;'),
+                            % (url.replace('"', '&quot;'),
+                               link_text.replace('"', '&quot;'),
                                 title_str, self.empty_element_suffix)
                          curr_pos = start_idx + len(result)
                          text = text[:start_idx] + result + text[match.end():]
@@ -786,7 +795,8 @@ class Markdown(object):
                              title_str = ''
                          if is_img:
                              result = '<img src="%s" alt="%s"%s%s' \
-                                % (url, link_text.replace('"', '&quot;'),
+                                % (url.replace('"', '&quot;'),
+                                   link_text.replace('"', '&quot;'),
                                     title_str, self.empty_element_suffix)
                              curr_pos = start_idx + len(result)
                              text = text[:start_idx] + result + text[match.end():]
@@ -1314,7 +1324,7 @@ class Markdown(object):
                          .replace('*', g_escape_table['*'])
                          .replace('_', g_escape_table['_']))
                  link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
-                hash = md5(link).hexdigest()
+                hash = _hash_text(link)
                  link_from_hash[hash] = link
                  text = text[:start] + hash + text[end:]
          for hash, link in link_from_hash.items():
@@ -1544,7 +1554,8 @@ def _xml_encode_email_char_at_random(ch):
      r = random()
      # Roughly 10% raw, 45% hex, 45% dec.
      # '@' *must* be encoded. I [John Gruber] insist.
-    if r > 0.9 and ch != "@":
+    # Issue 26: '_' must be encoded.
+    if r > 0.9 and ch not in "@_":
          return ch
      elif r < 0.45:
          # The [1:] is to drop leading '0': 0x63 -> x63
@@ -1552,8 +1563,6 @@ def _xml_encode_email_char_at_random(ch):
      else:
          return '&#%s;' % ord(ch)
  
-def _hash_text(text):
-    return 'md5:'+md5(text.encode("utf-8")).hexdigest()