From 5b24964cda642526f6331573980b2cd6f254780b Mon Sep 17 00:00:00 2001
From: Holger Schurig <hs4233@mail.mn-solutions.de>
Date: Thu, 25 Jun 2009 23:44:58 +0200
Subject: [PATCH] read_*, webber: fix loading of files with "Umlaut" characters

You cannot do some f.readline() actions and then read the rest via f.read(),
at least not not if the files aren't pure ASCII.
---
 plugins/read_html.py     |  3 +-
 plugins/read_markdown.py |  3 +-
 plugins/read_rst.py      |  4 +--
 webber.py                | 73 ++++++++++++++++++++++------------------
 4 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/plugins/read_html.py b/plugins/read_html.py
index 0ad6a8b..94ad640 100644
--- a/plugins/read_html.py
+++ b/plugins/read_html.py
@@ -7,8 +7,7 @@ def read(params):
 	file = params.file
 	if file.rel_path.endswith(".html"):
 		file.render = "html"
-		f = file.read_keywords()
-		return f.read()
+		file.read()
 
 
 @set_hook("htmlize")
diff --git a/plugins/read_markdown.py b/plugins/read_markdown.py
index 65f8c06..06ed2ad 100644
--- a/plugins/read_markdown.py
+++ b/plugins/read_markdown.py
@@ -1580,8 +1580,7 @@ def read(params):
 	file = params.file
 	if file.rel_path.endswith(".md"):
 		file.render = "html"
-		f = file.read_keywords()
-		return f.read()
+		file.read()
 
 
 _markdown = None
diff --git a/plugins/read_rst.py b/plugins/read_rst.py
index e8e3a36..a9968ac 100644
--- a/plugins/read_rst.py
+++ b/plugins/read_rst.py
@@ -9,9 +9,7 @@ def read(params):
 	file = params.file
 	if file.rel_path.endswith(".rst"):
 		file.render = "html"
-		f = file.read_keywords()
-		return f.read()
-
+		file.read()
 
 
 class WebHTMLTranslator(html4css1.HTMLTranslator):
diff --git a/webber.py b/webber.py
index db3ea7e..194ce5c 100644
--- a/webber.py
+++ b/webber.py
@@ -1,5 +1,5 @@
 # -*- coding: iso-8859-1 -*-
-import sys, os, optparse, fnmatch, stat, re, time, types
+import sys, os, optparse, fnmatch, stat, re, time, codecs
 from config import Holder
 
 
@@ -69,36 +69,45 @@ class File(Holder):
 		#print self.keys()
 
 	reKeywords = re.compile(r'(\S+)\s*:\s*(.*)')
-	#reIsoDate = re.compile(r'(\d\d\d\d)-(\d\d)-(\d\d)')
-
-	def read_keywords(self, terminate_line=""):
-		"""Opens the file and reads "key: value" pairs on the top of it. Returns
-		the open file handle for further processing by some plugins/read_*.py code."""
-		f = open(self.path)
-		while True:
-			s = f.readline().strip()
-			if s==terminate_line:
-				break
-			m = self.reKeywords.match(s)
-			if not m:
-				warning("%s: wrong 'key: value' line '%s'" % (self.rel_path, s))
-				break
-			key = m.group(1).lower()
-			val = m.group(2)
-
-			if key == "mtime":
-				val = iso_to_time(val)
-
-			if key == "ctime":
-				val = iso_to_time(val)
-
-			if key == "title":
-				if not self.has_key("linktitle"):
-					self["linktitle"] = val
-
-			#print self.rel_path, key, val
-			self[key] = val
-		return f
+
+	def read(self, terminate_line=""):
+		f = codecs.open(self.path, "r", self.input_encoding)
+
+		# Read keywords
+		read_keywords = True
+		txt = []
+		for s in f.readlines():
+			if read_keywords:
+				s = s.strip()
+				#print "kwd:", s
+				if s==terminate_line:
+					read_keywords = False
+					continue
+
+				m = self.reKeywords.match(s)
+				if not m:
+					warning("%s: wrong 'key: value' line '%s'" % (self.rel_path, s))
+					break
+				key = m.group(1).lower()
+				val = m.group(2)
+
+				if key == "mtime":
+					val = iso_to_time(val)
+
+				if key == "ctime":
+					val = iso_to_time(val)
+
+				if key == "title":
+					if not self.has_key("linktitle"):
+						self["linktitle"] = val
+
+				#print self.rel_path, key, val
+				self[key] = val
+
+				continue
+			#print "txt:", s.rstrip().encode("iso-8859-1")
+			txt.append(s)
+		self.contents = "".join(txt)
 
 
 _get_file_for_cache = {}
@@ -567,7 +576,7 @@ def run_macros(file, contents):
 			kw["file"] = file
 			f = macros[name]
 			s = f(kw)
-			if type(s) == types.UnicodeType:
+			if isinstance(s, unicode):
 				s = s.encode("utf-8")
 			return s
 		else:
-- 
2.39.2