From 5b24964cda642526f6331573980b2cd6f254780b Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Thu, 25 Jun 2009 23:44:58 +0200 Subject: [PATCH] read_*, webber: fix loading of files with "Umlaut" characters You cannot do some f.readline() actions and then read the rest via f.read(), at least not not if the files aren't pure ASCII. --- plugins/read_html.py | 3 +- plugins/read_markdown.py | 3 +- plugins/read_rst.py | 4 +-- webber.py | 73 ++++++++++++++++++++++------------------ 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/plugins/read_html.py b/plugins/read_html.py index 0ad6a8b..94ad640 100644 --- a/plugins/read_html.py +++ b/plugins/read_html.py @@ -7,8 +7,7 @@ def read(params): file = params.file if file.rel_path.endswith(".html"): file.render = "html" - f = file.read_keywords() - return f.read() + file.read() @set_hook("htmlize") diff --git a/plugins/read_markdown.py b/plugins/read_markdown.py index 65f8c06..06ed2ad 100644 --- a/plugins/read_markdown.py +++ b/plugins/read_markdown.py @@ -1580,8 +1580,7 @@ def read(params): file = params.file if file.rel_path.endswith(".md"): file.render = "html" - f = file.read_keywords() - return f.read() + file.read() _markdown = None diff --git a/plugins/read_rst.py b/plugins/read_rst.py index e8e3a36..a9968ac 100644 --- a/plugins/read_rst.py +++ b/plugins/read_rst.py @@ -9,9 +9,7 @@ def read(params): file = params.file if file.rel_path.endswith(".rst"): file.render = "html" - f = file.read_keywords() - return f.read() - + file.read() class WebHTMLTranslator(html4css1.HTMLTranslator): diff --git a/webber.py b/webber.py index db3ea7e..194ce5c 100644 --- a/webber.py +++ b/webber.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -import sys, os, optparse, fnmatch, stat, re, time, types +import sys, os, optparse, fnmatch, stat, re, time, codecs from config import Holder @@ -69,36 +69,45 @@ class File(Holder): #print self.keys() reKeywords = re.compile(r'(\S+)\s*:\s*(.*)') - #reIsoDate = re.compile(r'(\d\d\d\d)-(\d\d)-(\d\d)') - - def read_keywords(self, terminate_line=""): - """Opens the file and reads "key: value" pairs on the top of it. Returns - the open file handle for further processing by some plugins/read_*.py code.""" - f = open(self.path) - while True: - s = f.readline().strip() - if s==terminate_line: - break - m = self.reKeywords.match(s) - if not m: - warning("%s: wrong 'key: value' line '%s'" % (self.rel_path, s)) - break - key = m.group(1).lower() - val = m.group(2) - - if key == "mtime": - val = iso_to_time(val) - - if key == "ctime": - val = iso_to_time(val) - - if key == "title": - if not self.has_key("linktitle"): - self["linktitle"] = val - - #print self.rel_path, key, val - self[key] = val - return f + + def read(self, terminate_line=""): + f = codecs.open(self.path, "r", self.input_encoding) + + # Read keywords + read_keywords = True + txt = [] + for s in f.readlines(): + if read_keywords: + s = s.strip() + #print "kwd:", s + if s==terminate_line: + read_keywords = False + continue + + m = self.reKeywords.match(s) + if not m: + warning("%s: wrong 'key: value' line '%s'" % (self.rel_path, s)) + break + key = m.group(1).lower() + val = m.group(2) + + if key == "mtime": + val = iso_to_time(val) + + if key == "ctime": + val = iso_to_time(val) + + if key == "title": + if not self.has_key("linktitle"): + self["linktitle"] = val + + #print self.rel_path, key, val + self[key] = val + + continue + #print "txt:", s.rstrip().encode("iso-8859-1") + txt.append(s) + self.contents = "".join(txt) _get_file_for_cache = {} @@ -567,7 +576,7 @@ def run_macros(file, contents): kw["file"] = file f = macros[name] s = f(kw) - if type(s) == types.UnicodeType: + if isinstance(s, unicode): s = s.encode("utf-8") return s else: -- 2.39.2