From: Holger Schurig Date: Wed, 24 Jun 2009 15:20:12 +0000 (+0200) Subject: Initial commit X-Git-Url: https://oss.titaniummirror.com/gitweb?a=commitdiff_plain;h=5b4e747f947e1a4c757fbcaed1f6d977374b208e;p=webber.git Initial commit --- 5b4e747f947e1a4c757fbcaed1f6d977374b208e diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0bc3bff --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/webber.conf +? +*.pyc +*.pyo +/out +/cache.tmp +/sitemap.xml +/old.* +/newbuild.prof +/in/files diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2e69fb2 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ + +all: webber.conf + ./webber + +profile: + ./webber --profile + +clean: + rm -f *.pyc plugins/*.pyc + +realclean: clean + rm -rf out + +# Automatically create webber.conf: +ifeq ($(wildcard webber.conf),) +webber.conf: in/webber.conf + ln -s in/webber.conf +endif diff --git a/config.py b/config.py new file mode 100755 index 0000000..a816d50 --- /dev/null +++ b/config.py @@ -0,0 +1,608 @@ +import sys, os, codecs, types + + +try: + import encodings.utf_32 + has_utf32 = True +except: + has_utf32 = False + +class ConfigInputStream(object): + """ + An input stream which can read either ANSI files with default encoding + or Unicode files with BOMs. + + Handles UTF-8, UTF-16LE, UTF-16BE. Could handle UTF-32 if Python had + built-in support. + """ + def __init__(self, stream): + """ + Initialize an instance. + + @param stream: The underlying stream to be read. Should be seekable. + @type stream: A stream (file-like object). + """ + encoding = None + signature = stream.read(4) + used = -1 + if has_utf32: + if signature == codecs.BOM_UTF32_LE: + encoding = 'utf-32le' + elif signature == codecs.BOM_UTF32_BE: + encoding = 'utf-32be' + if encoding is None: + if signature[:3] == codecs.BOM_UTF8: + used = 3 + encoding = 'utf-8' + elif signature[:2] == codecs.BOM_UTF16_LE: + used = 2 + encoding = 'utf-16le' + elif signature[:2] == codecs.BOM_UTF16_BE: + used = 2 + encoding = 'utf-16be' + else: + used = 0 + if used >= 0: + stream.seek(used) + if encoding: + reader = codecs.getreader(encoding) + stream = reader(stream) + self.stream = stream + self.encoding = encoding + + def read(self, size): + if (size == 0) or (self.encoding is None): + rv = self.stream.read(size) + else: + rv = u'' + while size > 0: + rv += self.stream.read(1) + size -= 1 + return rv + + def close(self): + self.stream.close() + + def readline(self): + if self.encoding is None: + line = '' + else: + line = u'' + while True: + c = self.stream.read(1) + if c: + line += c + if c == '\n': + break + return line + + +WORD = 'a' +NUMBER = '9' +STRING = '"' +EOF = '' +LCURLY = '{' +RCURLY = '}' +LBRACK = '[' +LBRACK2 = 'a[' +RBRACK = ']' +COMMA = ',' +COLON = ':' +MINUS = '-' +TRUE = 'True' +FALSE = 'False' +NONE = 'None' + +WORDCHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_" + +if sys.platform == 'win32': + NEWLINE = '\r\n' +elif os.name == 'mac': + NEWLINE = '\r' +else: + NEWLINE = '\n' + +class ConfigError(Exception): + """ + This is the base class of exceptions raised by this module. + """ + pass + +class ConfigFormatError(ConfigError): + """ + This is the base class of exceptions raised due to syntax errors in + configurations. + """ + pass + +class ConfigReader(object): + """ + This internal class implements a parser for configurations. + + >>> conf = Holder() + >>> cr = ConfigReader(conf) + >>> cr.load("webber.conf") + >>> print conf.lang + de + >>> import cStringIO + >>> s = cStringIO.StringIO("num: 1\\nlang: 'us'") + >>> cr.load(s) + >>> print conf.lang + us + >>> print conf.num + 1 + >>> s = cStringIO.StringIO("arr: [1,2]") + >>> cr.load(s) + >>> print conf.arr + [1, 2] + >>> s = cStringIO.StringIO("assoc: {a:11, b:22}") + >>> cr.load(s) + >>> print conf.assoc["a"], conf.assoc["b"] + 11 22 + """ + + def __init__(self, config): + self.filename = None + self.config = config + self.lineno = 0 + self.colno = 0 + self.lastc = None + self.last_token = None + self.whitespace = ' \t\r\n' + self.quotes = '\'"' + self.punct = ':-+*/%,.{}[]()@`$' + self.digits = '0123456789' + self.wordchars = '%s' % WORDCHARS # make a copy + self.identchars = self.wordchars + self.digits + self.pbchars = [] + self.pbtokens = [] + + def location(self): + """ + Return the current location (filename, line, column) in the stream + as a string. + + Used when printing error messages, + + @return: A string representing a location in the stream being read. + @rtype: str + """ + return "%s(%d,%d)" % (self.filename, self.lineno, self.colno) + + def getChar(self): + """ + Get the next char from the stream. Update line and column numbers + appropriately. + + @return: The next character from the stream. + @rtype: str + """ + if self.pbchars: + c = self.pbchars.pop() + else: + c = self.stream.read(1) + self.colno += 1 + if c == '\n': + self.lineno += 1 + self.colno = 1 + return c + + def __repr__(self): + return "" % id(self) + + __str__ = __repr__ + + def getToken(self): + """ + Get a token from the stream. String values are returned in a form + where you need to eval() the returned value to get the actual + string. The return value is (token_type, token_value). + + Multiline string tokenizing is thanks to David Janes (BlogMatrix) + + @return: The next token. + @rtype: A token tuple. + """ + if self.pbtokens: + return self.pbtokens.pop() + stream = self.stream + token = '' + tt = EOF + while True: + c = self.getChar() + if not c: + break + if c in self.quotes: + token = c + quote = c + tt = STRING + escaped = False + multiline = False + c1 = self.getChar() + if c1 == quote: + c2 = self.getChar() + if c2 == quote: + multiline = True + token += quote + token += quote + else: + self.pbchars.append(c2) + self.pbchars.append(c1) + else: + self.pbchars.append(c1) + while True: + c = self.getChar() + if not c: + break + token += c + if (c == quote) and not escaped: + if not multiline or (len(token) >= 6 and token.endswith(token[:3]) and token[-4] != '\\'): + break + if c == '\\': + escaped = not escaped + else: + escaped = False + if not c: + raise ConfigFormatError('%s: Unterminated quoted string: %r, %r' % (self.location(), token, c)) + break + if c in self.whitespace: + self.lastc = c + continue + elif c in self.punct: + token = c + tt = c + if (self.lastc == ']') or (self.lastc in self.identchars): + if c == '[': + tt = LBRACK2 + break + elif c in self.digits: + token = c + tt = NUMBER + while True: + c = self.getChar() + if not c: + break + if c in self.digits: + token += c + elif (c == '.') and token.find('.') < 0: + token += c + else: + if c and (c not in self.whitespace): + self.pbchars.append(c) + break + break + elif c in self.wordchars: + token = c + tt = WORD + c = self.getChar() + while c and (c in self.identchars): + token += c + c = self.getChar() + if c: # and c not in self.whitespace: + self.pbchars.append(c) + if token == "True": + tt = TRUE + elif token == "False": + tt = FALSE + elif token == "None": + tt = NONE + break + else: + raise ConfigFormatError('%s: Unexpected character: %r' % (self.location(), c)) + if token: + self.lastc = token[-1] + else: + self.lastc = None + self.last_token = tt + return (tt, token) + + def load(self, stream): + """ + Load the configuration from the specified stream. + + @param stream: A stream from which to load the configuration. + @type stream: A stream (file-like object). + @param suffix: The suffix of this configuration in the parent + configuration. Should be specified whenever the parent is not None. + @raise ConfigError: If parent is specified but suffix is not. + @raise ConfigFormatError: If there are syntax errors in the stream. + """ + + if type(stream) == types.StringType: + stream = ConfigInputStream(file(stream, 'rb')) + + self.setStream(stream) + self.token = self.getToken() + self.parseMappingBody(self.config) + if self.token[0] != EOF: + raise ConfigFormatError('%s: expecting EOF, found %r' % (self.location(), self.token[1])) + + def setStream(self, stream): + """ + Set the stream to the specified value, and prepare to read from it. + + @param stream: A stream from which to load the configuration. + @type stream: A stream (file-like object). + """ + self.stream = stream + if hasattr(stream, 'name'): + filename = stream.name + else: + filename = '?' + self.filename = filename + self.lineno = 1 + self.colno = 1 + + def match(self, t): + """ + Ensure that the current token type matches the specified value, and + advance to the next token. + + @param t: The token type to match. + @type t: A valid token type. + @return: The token which was last read from the stream before this + function is called. + @rtype: a token tuple - see L{getToken}. + @raise ConfigFormatError: If the token does not match what's expected. + """ + if self.token[0] != t: + raise ConfigFormatError("%s: expecting %s, found %r" % (self.location(), t, self.token[1])) + rv = self.token + self.token = self.getToken() + return rv + + def parseMappingBody(self, parent): + """ + Parse the internals of a mapping, and add entries to the provided + L{Mapping}. + + @param parent: The mapping to add entries to. + @type parent: A L{Mapping} instance. + """ + while self.token[0] in [WORD, STRING]: + self.parseKeyValuePair(parent) + + def parseKeyValuePair(self, parent): + """ + Parse a key-value pair, and add it to the provided L{Mapping}. + + @param parent: The mapping to add entries to. + @type parent: A L{Mapping} instance. + @raise ConfigFormatError: if a syntax error is found. + """ + tt, tv = self.token + if tt == WORD: + key = tv + suffix = tv + elif tt == STRING: + key = eval(tv) + suffix = '[%s]' % tv + else: + msg = "%s: expecting word or string, found %r" + raise ConfigFormatError(msg % (self.location(), tv)) + self.token = self.getToken() + # for now, we allow key on its own as a short form of key : True + if self.token[0] == COLON: + self.token = self.getToken() + value = self.parseValue(parent, suffix) + else: + value = True + try: + parent[key] = value + except Exception, e: + raise ConfigFormatError("%s: %s, %r" % (self.location(), e, + self.token[1])) + tt = self.token[0] + if tt not in [EOF, WORD, STRING, RCURLY, COMMA]: + msg = "%s: expecting one of EOF, WORD, STRING, RCURLY, COMMA, found %r" + raise ConfigFormatError(msg % (self.location(), self.token[1])) + if tt == COMMA: + self.token = self.getToken() + + def parseValue(self, parent, suffix): + """ + Parse a value. + + @param parent: The container to which the value will be added. + @type parent: A L{Container} instance. + @param suffix: The suffix for the value. + @type suffix: str + @return: The value + @rtype: any + @raise ConfigFormatError: if a syntax error is found. + """ + tt = self.token[0] + if tt in [STRING, WORD, NUMBER, TRUE, FALSE, NONE, MINUS]: + rv = self.parseScalar() + elif tt == LBRACK: + rv = self.parseSequence(parent, suffix) + elif tt in [LCURLY]: + rv = self.parseMapping(parent, suffix) + else: + raise ConfigFormatError("%s: unexpected input: %r" % (self.location(), self.token[1])) + return rv + + def parseSequence(self, parent, suffix): + """ + Parse a sequence. + + @param parent: The container to which the sequence will be added. + @type parent: A L{Container} instance. + @param suffix: The suffix for the value. + @type suffix: str + @return: a L{Sequence} instance representing the sequence. + @rtype: L{Sequence} + @raise ConfigFormatError: if a syntax error is found. + """ + rv = [] + self.match(LBRACK) + tt = self.token[0] + while tt in [STRING, WORD, NUMBER, LCURLY, LBRACK, TRUE, FALSE, NONE]: + suffix = '[%d]' % len(rv) + value = self.parseValue(parent, suffix) + rv.append(value) + tt = self.token[0] + if tt == COMMA: + self.match(COMMA) + tt = self.token[0] + continue + self.match(RBRACK) + return rv + + def parseMapping(self, parent, suffix): + """ + Parse a mapping. + + @param parent: The container to which the mapping will be added. + @type parent: A L{Container} instance. + @param suffix: The suffix for the value. + @type suffix: str + @return: a L{Mapping} instance representing the mapping. + @rtype: L{Mapping} + @raise ConfigFormatError: if a syntax error is found. + """ + if self.token[0] == LCURLY: + self.match(LCURLY) + rv = {} + self.parseMappingBody(rv) + self.match(RCURLY) + return rv + + def parseScalar(self): + """ + Parse a scalar - a terminal value such as a string or number, or + an L{Expression} or L{Reference}. + + @return: the parsed scalar + @rtype: any scalar + @raise ConfigFormatError: if a syntax error is found. + """ + tt = self.token[0] + if tt in [NUMBER, WORD, STRING, TRUE, FALSE, NONE]: + rv = self.token[1] + if tt != WORD: + rv = eval(rv) + self.match(tt) + elif tt == MINUS: + self.match(MINUS) + rv = -self.parseScalar() + else: + raise ConfigFormatError("%s: unexpected input: %r" % + (self.location(), self.token[1])) + #print "parseScalar: '%s'" % rv + return rv + + + +class Holder(object): + """This is a simple wrapper class so that you can write + + h = Holder(bar=1, baz="test") + + instead of writing + + foo["bar"] = 1 + baz["bar"] = "test" + + Holder will be the base class for all configurations and objects. + """ + + def __init__(self, **kw): + """Creates a new folder object: + + >>> h = Holder(bar=1, baz="test") + >>> print h.bar + 1 + >>> print h.baz + test + """ + self.__dict__.update(kw) + self._inherit_from = [] + + def keys(self): + """Return list of stored variables. + + >>> h = Holder(bar=1, baz="test") + >>> print sorted(h.keys()) + ['bar', 'baz'] + """ + return filter(lambda x: x[0] != '_', self.__dict__.keys()) + + def has_key(self, key): + return self.__dict__.has_key(key) + + def setDefault(self, key, value): + if not self.__dict__.has_key(key): + self.__dict__[key] = value + + def __getitem__(self, key): + """Allows access to the variables via obj[name] syntax. + + >>> h = Holder() + >>> h.foo = "Hello" + >>> print h["foo"] + Hello + """ + try: + return self.__dict__[key] + except: + pass + for inh in self._inherit_from: + try: + return inh[key] + except: + pass + raise KeyError(key) + + __getattr__ = __getitem__ + + def __setitem__(self,key,val): + """Allows access to the variables via obj[name] syntax. + + >>> h = Holder() + >>> h["foo"] = "Hello" + >>> print h.foo + Hello + """ + self.__dict__[key] = val + + def inheritFrom(self, holder): + """ + This allows on Holder to inherit settings from another holder. + + >>> h1 = Holder(a=1, b=2) + >>> h2 = Holder(c=3) + >>> h2.inheritFrom(h1) + >>> print h2.c + 3 + >>> print h2["b"] + 2 + >>> print h2.a + 1 + """ + self._inherit_from.append(holder) + + def load(self, stream): + """ + >>> conf = Holder() + >>> cr = ConfigReader(conf) + >>> cr.load("webber.conf") + >>> print conf.lang + de + """ + cr = ConfigReader(self) + cr.load(stream) + + def __repr__(self): + return "<%s object: " % self.__class__.__name__ + ",".join(self.keys()) + ">" + + + + + +def _test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": + _test() diff --git a/in/commandline.md b/in/commandline.md new file mode 100644 index 0000000..e404040 --- /dev/null +++ b/in/commandline.md @@ -0,0 +1,53 @@ +title: Command line options +linktitle: Cmdline +parent: Configuration +ctime: 2009-06-26 + += Help = + +As usualy, you can get command line help with "`-h`" or "`--help`": + + usage: webber [options] + + options: + -h, --help show this help message and exit + -i DIR, --in=DIR input directory + -o DIR, --out=DIR output directory + --style-dir=STYLE directory with style sheets + -v, --verbose print status messages to stdout + -k, --keepgoing keep going past errors if possible + -V, --test_verbose print status messages to stdout + += Input directory = + +"`-i`" or "`--in`" defaults to "`in`" and specifies where webber +search for source files. + +You can access this via "`cfg.in_dir`" (or "`file.in_dir`", see +[[inheritance]]). + += Output directory = + +"`-o`" or "`--out`" defaults to "`out`" and specifies where webber +writes the output files. + + += Template (Style) = + +You can define the style of the generated website via HTML templates. If +you have more of them, you switch between different ones via "`--style-dir`". +The default is "`in/style`". + + += Verbosity = + +A common option is "`-v`" (or "`--verbose`") to increase the verbosity. Repeat +to increase even more. + + += Continue on errors = + +With "`-k`" or "`--keepgoing`" you can tell webber to ignore errors in one +page and continue with the next page. + + diff --git a/in/configuration.md b/in/configuration.md new file mode 100644 index 0000000..ed39557 --- /dev/null +++ b/in/configuration.md @@ -0,0 +1,80 @@ +title: Configuration +parent: Webber +ctime: 2009-06-26 + +Configuration happens either the [[commandline]] or +via the configuration file (described below). All Configurations are +[[inherited|inheritance]] and or overwritable on a per-directory and +per-file basis. + +The main means for configuration is the config file: + += Format = + +Webber expects a `webber.conf` file in it's root directory. It should look like this: + + template: "default" + date_format: "%d.%m.%Y" + input_encoding: "iso-8859-1" + output_encoding: "iso-8859-1" + plugins: [ + "skeleton", + "hierarchy", + "link", + "read_rst", + "read_html", + "read_copyonly", + "read_markdown", + "template_mako", + ] + exclude_dir: [ + "webber.conf", + "*.tmpl", + ] + exclude_files: [ + ] + +You could also some options with are normally defined by [[commandline]]. +This saves you from specifying them on ever run of webber: + + in_dir: "in" + out_dir: "out" + style_dir: "in/style" + verbose: 5 + +Beside those entries, you can specify any additional entries that will then +be available in your plugins or templates. + += Meaning = + +== template == + +Used by the [[template_mako]] to select the template. + +== date_format == + +Used in `format_date()`. + +== input_encoding == + +Encoding ('utf-8', 'iso-8859-1' etc) used for reading files. + +== output_encoding == + +Encoding ('utf-8', 'iso-8859-1' etc) used when writing the final HTML pages. + +== plugins == + +List of [[plugins]] to load. + +== exclude_dirs == + +List of directories below `cfg.in_dir` to skip. + +== exclude_files == + +List of files below `cfg.in_dir` to skip. + +== in_dir, out_dir, style_dir == + +See [[commandline]]. diff --git a/in/functions.md b/in/functions.md new file mode 100644 index 0000000..79d51ae --- /dev/null +++ b/in/functions.md @@ -0,0 +1,74 @@ +title: Functions +parent: Home +ctime: 2009-06-26 + += Calling functions = + +You can call functions only from [[template_mako]]. An example: + + Modified ${format_date(mtime)} + +Here's list of functions defined by webber and it's default plugins: + +== format_date == + +Takes a timestamp (seconds since 1st January 1970) and converts it into +a string, using to `cfg.date_format`. + +Defined in `webber.py`. + +== get_breadcrumbs == + +Returns the breadcrumbs as "`(linktitle, link)`" tuples. + +Defined in [[hierarchy.py|hierarchy]], where you find an example. + +== get_current_file == + +Returns the current `class File` object. + +Defined in `webber.py`. + +== get_recently == + +Returns a list of up to 10 pages below the current page. For each +page, you'll get a "`(mtime, ctime, title, link)`" tuple back. + +Defined in [[hierarchy.py|hierarchy]], where you find an example. + + +== get_sidemenu == + +Returns a menu for the current page. For each page in this menu you'll +get back a "`(level, part_of_path, is_current, title, link)`" tuple. + +Defined in [[hierarchy.py|hierarchy]], where you find an example. + + +== func == + +A sample function in the [[skeleton.py|skeleton]]. See below. + + += Writing functions = + +A function is a simply python function which returns HTML. The function needs +to be decorated with "`@set_function(name)`". There's an example in +[[skeleton.py|skeleton]], which looks like: + + :::python + @set_function("func") + def sample_func(): + if cfg.test_verbose: + print "in macro skeleton.sample_func" + return "{ output from sample function }" + +Inside your template, you can call the function without parameters or +with arbitrary parameters, like this: + + ${func(a=1, b="test")} + +Inside your function you can access this as ... + +* "`params.a`" which contains the integer "`1`" +* "`params.b`" which contains the string "`test`" diff --git a/in/hierarchy.md b/in/hierarchy.md new file mode 100644 index 0000000..15b47a9 --- /dev/null +++ b/in/hierarchy.md @@ -0,0 +1,138 @@ +title: Generate hierarchy +linktitly: Hierarchy +parent: Plugins +ctime: 2009-06-26 + +This is one of the more complex plugins, used to generate menus and +breadcrumbs. For this, it reads certain keyword from the +[[pageformat]], built an internal parent-child representation. + +This is later used for by the functions "`get_breadcrumbs()`" and +"`get_sidemenu()`", which you call from the [[template_mako]]. + += Page attributes = + +At the "`scan`" [[hook|hooks]], the plugin looks for entries like: + + parent: Home + +or + + childs: Cmdline, Inheritance + +Here's an example of five pages with different attributes: + +--- + + title: Homepage + linktitle: Home + +--- + + title: Impressum + parent: Home + +--- + + title: Job + parent: Home + +--- + + title: CV + parent: Job + +--- + + title: Knowledge + parent: Job + +--- + += Internal representation = + +the plugin would populate the variables "`_childs`" and "`_parent`" like this: + + _parent = { + 'Impressum': 'Home', + 'CV': 'Job', + 'Knowledge': 'Job', + 'Job': 'Home' + } + + _childs = { + 'Home': [(100, 'Job'), + (100, 'Impressum')], + 'Job': [(100, 'CV'), + (100, 'Knowledge')]} + +That's all you need to generate a sidemap, breadcrumbs or a side-menu. + +The pages are first ordered by some number, then by the "`linktitle`". If +a page has no "`linktitle:`" attribute, then the normal title will be used +instead. + +If you want to modify the sort-order, simply specify a "`order: 200`" in the +page itself. + += Generation of breadcrumbs = + +This is done via a suitable [[template_mako]]. The +template uses the function "`get_breadcrumbs(linktitle)`" and returns +(linktitle, link) tuples. As a bonus: all the links are always relative to +the calling page. + +Here's a sample Mako template excerpt: + + \ + += Generation of a side-menu = + +This again is done via a suitable [[template_mako]]. The +template uses the function "`get_sidemenu(linktitle)`" and returns (level, +part_of_path, is_current, title, link) tuples. Again all links are relative +to the calling page. + +* "`level`" is the indendation level, starting with 0. You can use this for + CSS "`id=`" or "`class`" attributes +* "`part_of_path`" is a flag telling you if the mentioned page is part + of your path, i.e. if the specified page is in the breadcrumbs. +* "`is_current`" is a flag marking the current page. +* "`title`" is the full title for the page +* "`link`" is the relative URL to the page + +Here's a sample Mako template excerpt that converts this into a HTML menu: + + + += Generate a list of recently changed pages = + +To get a list of recently changed pages, do this: + + <% + history = get_recently(get_current_file()) + %> + % if len(history)>1: +

Recent changed

+ % for mtime,ctime,title,link in history: + % if mtime > ctime: + Modified ${format_date(mtime)}\ + % else: + Created ${format_date(ctime)}\ + % endif + : ${title | entity}
+ % endfor + % endif diff --git a/in/hooks.md b/in/hooks.md new file mode 100644 index 0000000..bcf86c0 --- /dev/null +++ b/in/hooks.md @@ -0,0 +1,189 @@ +title: Hooks +parent: Webber +ctime: 2009-06-26 + += At Startup = + +== addoptions == + +Can be used by plugins to add their own command line options. + +"`params.parser`" contains the "`optparse`" based parser. + +Example: + + :::python + @set_hook("addoptions") + def test_addoptions(params): + params.parser.add_option("-V", "--test_verbose", action="count", + dest="test_verbose", default=0, + help="print status messages to stdout") + +== checkconfig == + +After the command-line options have been processed and incorporated into +config object, this hook is called. Here each plugin can check if the +specified configurations are sane. + +* "`params`" is empty, you should use "`cfg`" directly: + +Example: + + :::python + @set_hook("checkconfig") + def test_checkconfig(params): + if cfg.test_verbose: + print "WARNING: I'll be now much more noisy" + # I could also directly modify the configuration: + cfg.foo = "mooh" + +== start == + +This hook is called just before walking the directory tree. + +* "`params`" is empty: + +Example: + + :::python + @set_hook("start") + def test_start(params): + print "in start hook" + + += While reading source files = + +== read == + +Now webber walks the directory tree specified in "`cfg.in_dir"`, excluding +anything from "`cfg.exclude_dir`" and "`cfg.exclude_file"`. For each of the +remaining files this hook is called. + +Usually the the "`read_*`" plugins implement this hook. And usually they look +at the file-extension and decide if they the can procecess this file or not. +If they do, the plugin should also set "`file.render`" is normally "`html"`. +However, it can be something else. In this case "`file.render`" specifies a +hook that get's called for this file. + +The first hook that returns contents wins, no other hooks will be called. + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" contains a "`class File`" object + +Example: + + :::python + @set_hook("read") + def read(params): + file = params.file + if file.rel_path.endswith(".html"): + file.render = "html" + f = file.read_keywords() + return f.read() + +== filter == + +Any file that got read will be filtered. At this stage the text is still in the +original format. + +Currently no webber-supplied plugin implements this. + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" contains a "`class File`" object +* "`params.contents`" contains the text + +Example: + + :::python + @set_hook("filter") + def filter(params): + params.content = params.content.replace("e", "EEEEE") + + += After reading files = + +At this stage all pages and their meta-information has been read. Now we can +generate additional data, e.g. page hierarchy, tag-clouds, lists of recently +changed files, etc. This is done via the following two hooks. + +The webber-supplied plugin [[hierarchy]] uses this +mechanism. + +== scan == + +This hook is called once per file with contents: + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" has a "`class File`" object +* "`params.file.contents`" contains the text + +== scan_done == + +Finally one "`scan_done`" hook is called. The plugin [[hierarchy]] +uses this to sort links. + +* "`params`" is empty. + += While rendering files = + +The following hooks are called for each file that has a rendered in +"`file.render`" set. See the "`read"`-hook in how to set it. + +If "`file.render`" is "`html"`, then the hooks "`htmlize"`, "`linkify`" and +"`pagetemplate`" are run in this order. Otherwise the hook specified +in "`file.render`" is called. + +== htmlize == + +This hook converts contents into HTML. + +The first hook that returns HTML, no other hooks will be called. + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" has a "`class File`" object +* "`params.file.contents`" contains the text + +== linkify == + +This hook should contain any link to html. Implemented by the plugin +[[link]]. + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" has a "`class File`" object +* "`params.file.contents`" contains the HTML for the body text of the page + +== pagetemplate == + +The implementation for this is responsible to generate the final html page, +ready to be written. Implemented by [[template_mako]] plugin. + +The first hook that returns a finished HTML page, no other hooks will be +called. + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" has a "`class File`" object +* "`params.file.contents`" contains the HTML for the body text of the page + +== copyfile == + +This is one local hook, run instead of the "`htmlize"`, "`linkify`" and +"`pagetemplate`" hooks. It's defined and implemented by the plugin +[[read_copyonly]]. + +The first hook that returs anything wins, no other hooks will be called. + +* "`params.direc`" contains a "`class Directory`" object +* "`params.file`" has a "`class File`" object + += At the end = + +Now everythings has been converted to HTML and written out. And we're just +one hook away from finishing webber: + +== finish == + +This hook is called at the end of webber's execution. No webber-supplied +plugin uses it currently, but you could use this to save local state into some +file. + +* "`params`" is empty diff --git a/in/index.md b/in/index.md new file mode 100644 index 0000000..a266b4f --- /dev/null +++ b/in/index.md @@ -0,0 +1,21 @@ +title: Webber +ctime: 2009-06-26 + +* [[overview]] +* [[pageformat]] +* [[configuration]] + * [[commandline]] + * [[inheritance]] +* [[plugins]] + * [[read_rst]] + * [[read_markdown]] + * [[read_html]] + * [[read_copyonly]] (e.g. images) + * [[link]] + * [[hierarchy]] + * [[template_mako]] + * [[skeleton]] +* [[hooks]] +* [[functions]] +* [[macros]] +* [[templates]] diff --git a/in/inheritance.md b/in/inheritance.md new file mode 100644 index 0000000..9e5194f --- /dev/null +++ b/in/inheritance.md @@ -0,0 +1,32 @@ +title: Configuration inheritance +linktitle: Inheritance +parent: Webber +ctime: 2009-06-26 + += Overview = + +Internally, webber uses a bunch of `Holder` classes to store information +(command-line options, config file options, parameters for a directory, +parameters for a file). + +Each `Holder` "inherits" configuration entries from the layer above: + +* `options` for command-line options +* `cfg` for entries from the command line +* `direc` for information about a directory +* `file` (either directly or via `get_current_file()` for data about the + currently rendered file + += Example = + +Due to parsing the [[command line|commandline]] there will exist an entry +`options.style_dir`. + +However, you can also access this same value via `cfg.style_dir`, +`direc.style_dir` and `file.style_dir`. Any one of them however could +over-write the settings that originally was in `options`. + +Quite often you'll use this for the page template. In `webber.conf`, you +specify `template: "default"`, which will be used for most pages. Any +page that needs a different template will get `template: history` entry +in it's header. diff --git a/in/link.md b/in/link.md new file mode 100644 index 0000000..f545011 --- /dev/null +++ b/in/link.md @@ -0,0 +1,47 @@ +title: Create HTML links +linktitle: Linkify +parent: Plugins +ctime: 2009-06-26 + +This plugin converts strings in the form + +
[[url]]
+[[text|url]]
+[[#anchor]]
+[[text|url#anchor]]
+
+ +into HTML `` tags. + += Automatic linkification = + +Instead of an URL you can also specify the following things: + +* the page title +* the short link title +* the basename of the file (filename without extension and directory name) + +In this case the link plugin will search throught all pages and take the +first match. + +Example: + +Suppose you've two file "`testfile.md`" and "`testfile2.md`" which looks like this: + + title: Foo + linktitle: bar + +--- + + title: Test2 + +then the following two links + +
[[Foo]]
+[[bar]]
+[[testfile2]]
+
+ +will produce two links to the first file and one link to the second file. +All text part of the HTML link will be substituted with the title of the +referred pages, except you specify a text by yourself. diff --git a/in/macros.md b/in/macros.md new file mode 100644 index 0000000..dfa5da8 --- /dev/null +++ b/in/macros.md @@ -0,0 +1,36 @@ +title: Macros +parent: Home +ctime: 2009-06-26 + += Calling macros = + +Macros are executed whenever the sequence +"[[!name]]" or +"[[!name args]]" is in the source-file. + +Webber itself doesn't define any macros. + + += Defining macros = + +A macro is a simply python function which returns HTML. The function needs +to be decorated with "`@set_macro(name)`". There's an example in +[[skeleton.py|skeleton]], which looks like: + + :::python + @set_macro("sample") + def sample_macro(params): + if cfg.test_verbose: + print "in macro skeleton.sample_macro, params:", params + return "{ output of sample macro }" + +If you call this macro, you'll see the output "[[!sample]]". + +* "`params.name`" contains the name of the macro +* "`params.file`" contains the current "`class File`" object + +You can submit additional string arguments, e.g. "[[!sample +arg1="string"]]". This will yield + +* "`params.arg1`" contains "`string`" + diff --git a/in/overview.md b/in/overview.md new file mode 100644 index 0000000..0fc4fe2 --- /dev/null +++ b/in/overview.md @@ -0,0 +1,8 @@ +title: Overview +parent: Webber +ctime: 2009-06-26 + +*Webber* is a static web-site generation tool, loosely based on ideas +from IkiWiki and my own likings. + +TODO diff --git a/in/pageformat.md b/in/pageformat.md new file mode 100644 index 0000000..f1bfa50 --- /dev/null +++ b/in/pageformat.md @@ -0,0 +1,108 @@ +title: Page format +parent: Home +ctime: 2009-06-26 + +Every page contains a header, then a blank line, and then the text that +should show up in the web page. + +The header consists of several keywords, followed by a color and a space, +and the the value. + +Here's an example: + + title: Impressum + + Hi, I'm Mario and I won't tell you more about me :-) + + += Your own keywords = + +Inside the template, functions and macros you can access all entries +by "`file.XXXX`" and you're free to invent your own keywords: + + title: Impressum + subtitle: What you should know about this web-site + + Hi, I'm Mario and I won't tell you more about me :-) + +Now you can access "`${file.subtitle}`" in your template and as +"`params.file.subtitle`" in your own [[macros|macros]] or +[[functions|functions]]. + + += Overriding configuration = + +As "`file`" inherits all configuration from "`cfg`" (see [[inheritance]]), +you can also specify a different template on a per-file basis: + + title: Impressum + template: boring_bg + + Hi, I'm Mario and I won't tell you more about me :-) + + += Webber's keywords = + +== title == + +Full (long) title for the page. End's up in +"`...`". + +Very mandatory. Extremely important. You cannot have a page without a title. +Never. Forget. The. Title. + +Depending on your template it will also be set inside "`

...

`" at the +start of your web page. + + +== linktitle == + +Sometimes the title is simply too long, e.g. for breadcrumbs. Therefore you +can specify a link-title, which will be used by [[hierarchy]] when generating +breadcrumbs and a side-menu. + + +== parent == + +Used by [[hierarchy]] to indicate relationship. + + +== order == + +All pages with the same "`parent`" will end up below the parent on the +side-menu (see [[hierarchy]] for details). They will be alphabetically sorted. + +If you don't want this default sort order, you can specify your own ordering. + +The default order is 100, so anything with a lower order will show up on the +top, anything higher at the bottom of the side-menu. + +== links == + +Used by [[hierarchy]] to indicate relationship. Usually it's better to use +"`parent`" instead. + + +== ctime == + +Here you can specify an ISO formatted date and or time specifier, which contains +the document creation date/time. Examples: + + ctime: 2009-06-29 + ctime: 2009-06-29 14:33 + +If you don't specify this, then the documents "`mtime`" will be used instead. + + +== mtime + +Here you can specify an ISO formatted date and or time specifier, which contains +the document modification date/time. Examples: + + mtime: 2009-06-29 + mtime: 2009-06-29 14:33 + +If you don't specify this, then the "last-modified"-time from the file-system +will be used instead. + + diff --git a/in/plugins.md b/in/plugins.md new file mode 100644 index 0000000..588e653 --- /dev/null +++ b/in/plugins.md @@ -0,0 +1,25 @@ +title: Plugins +parent: Webber +ctime: 2009-06-26 + +Webber doesn't do much on it's own. Almost all the real work is delegated +to plugins. Those plugins do: + +* Read files and generate HTML snippets ([[read_rst.py|read_rst]], + [[read_markdown.py|read_markdown]], [[read_html.py|read_html]]) + or copy files verbatim, e.g. for graphics + ([[read_copyonly.py|read_copyonly]]) +* Update internal state or modify HTML snippets + ([[hierarchy.py|hierarchy]], [[link.py|link]]) +* Create HTML pages ([[template_mako.py|template_mako]]) + +There's another plugin there ([[skeleton.py|skeleton]], which is +is just a demo for plugin-programmers. + +Plugins simply reside in the "`plugins/`" directory. However, webber +doesn't load all of them automatically. Instead you specify in the +configuration file [[webber.conf|configuration]] which +plugins you want. + +Once plugins are loaded, webber orchestrates the work of itself and +all plugins via [[hooks]]. diff --git a/in/read_copyonly.md b/in/read_copyonly.md new file mode 100644 index 0000000..bd4a5dc --- /dev/null +++ b/in/read_copyonly.md @@ -0,0 +1,18 @@ +title: Read and copy binary files +linktitle: Read binaries +parent: Plugins +ctime: 2009-06-26 + +This plugin copies files (e.g. graphics files) into the destination +folder. + +To configure which files should be copied you modify +[[webber.conf|configuration.html]]. An example snippet migth be: + + copy_files: [ + "*.png", + "*.jpg", + "*.gif", + "*.css", + "robots.txt", + ] diff --git a/in/read_html.md b/in/read_html.md new file mode 100644 index 0000000..7fac0e7 --- /dev/null +++ b/in/read_html.md @@ -0,0 +1,21 @@ +title: Read HTML +parent: Plugins +ctime: 2009-06-26 + +This plugin reads HTML snippets from "`*.html`" files. + +Please note that currently the plugin assumes that this is a HTML snippet. +That means: the snippes should only contain what is inside "``" and +"``", but without those tags themselfes. + +A sample "`test.html`" document looks like this: + + title: Job + parent: Home + ctime: 2008-10-01 + +

What I did in the past:

+ + +You'll find more about "`title:`", "`parent:`" and "`ctime:`" in the +[[page format|pageformat.html]] description. diff --git a/in/read_markdown.md b/in/read_markdown.md new file mode 100644 index 0000000..a23955e --- /dev/null +++ b/in/read_markdown.md @@ -0,0 +1,39 @@ +title: Read Markdown +parent: Plugins +ctime: 2009-06-26 + +This plugin reads "`*.md`" files and converts them to HTML. + +"[[Markdown|http://daringfireball.net/projects/markdown/]]" is a wiki-like +text format. The plugin "`read_markdown.py`" doesn't use the +standard Python module "`markdown`", but instead the faster and simpler +[[markdown2|http://code.google.com/p/python-markdown2/]] modoule. + +A sample "`test.md`" document looks like this: + + title: Impressum + parent: Home + ctime: 2008-10-01 + + # Address + + Mario Marionetti + 10, Mariott St + Marioland 1007 + + Don't send me spam, *ever*! + +You'll find more about "`title:`", "`parent:`" and "`ctime:`" in the +[[page format|pageformat.html]] description. + += Modifications = + +This implementation is based on python-markdown2 version 1.0.1.12, but has been +changed this way: + +* file-vars (emacs-style settings inside the file) have been disabled +* "Standardize line endings" removed +* call to _do_links() removed (we have the [[linkify|link.html]] pass for + this) +* logging removed +* allow "= Header =" in addition to "# Header #" diff --git a/in/read_rst.md b/in/read_rst.md new file mode 100644 index 0000000..43edc7a --- /dev/null +++ b/in/read_rst.md @@ -0,0 +1,27 @@ +title: Read RST +parent: Plugins +ctime: 2009-06-26 + +This plugin reads "`*.rst`" files and converts them to HTML. + +"RST" is the abbreviation for +[[reStructuredText|http://docutils.sourceforge.net/rst.html]], a format +common for many python programmers. The plugin "`read_rst.py`" uses the +standard Python module "`docutils`" to convert RST into HTML. A sample +"`test.rst`" document looks like this: + + title: Impressum + parent: Home + ctime: 2008-10-01 + + Address + ======= + + |Mario Marionetti + |10, Mariott St + |Marioland 1007 + + Don't send me spam, *ever*! + +You'll find more about "`title:`", "`parent:`" and "`ctime:`" in the +[[page format|pageformat.html]] description. \ No newline at end of file diff --git a/in/skeleton.md b/in/skeleton.md new file mode 100644 index 0000000..daed88b --- /dev/null +++ b/in/skeleton.md @@ -0,0 +1,6 @@ +title: Sample plugin skeleton +linktitly: Skeleton +parent: Plugins +ctime: 2009-06-26 + +TODO diff --git a/in/style/default.tmpl b/in/style/default.tmpl new file mode 100644 index 0000000..fedb172 --- /dev/null +++ b/in/style/default.tmpl @@ -0,0 +1,22 @@ +<%def name="contents()">\ +${body} +\ +####################################################################### + + + +${file.title | entity} + +% if len(keywords): + +% endif +% if len(description): + +% endif + + +

${file.title | entity}

+${self.contents()} + + diff --git a/in/style/history.tmpl b/in/style/history.tmpl new file mode 100644 index 0000000..089f576 --- /dev/null +++ b/in/style/history.tmpl @@ -0,0 +1,19 @@ +<%inherit file="default.tmpl"/> +####################################################################### +<%def name="contents()">\ +${body} +<% + history = get_recently(get_current_file()) +%> +% if len(history)>1: +

What's new?

+% for mtime,ctime,title,link in history: +% if mtime > ctime: + Modified ${format_date(mtime)}\ +% else: + Created ${format_date(ctime)}\ +% endif +:
${title | entity}
+% endfor +% endif +\ diff --git a/in/template_mako.md b/in/template_mako.md new file mode 100644 index 0000000..0b1b1d8 --- /dev/null +++ b/in/template_mako.md @@ -0,0 +1,6 @@ +title: Web page template +linktitly: HTML Template +parent: Plugins +ctime: 2009-06-26 + +TODO diff --git a/in/templates.md b/in/templates.md new file mode 100644 index 0000000..41bd4de --- /dev/null +++ b/in/templates.md @@ -0,0 +1,5 @@ +title: Templates +parent: Home +ctime: 2009-06-26 + +TODO diff --git a/in/webber.conf b/in/webber.conf new file mode 100644 index 0000000..225a2a3 --- /dev/null +++ b/in/webber.conf @@ -0,0 +1,29 @@ +template: "history" +subtitle: "Webber" +date_format: "%Y-%m.%d" +input_encoding: "iso-8859-1" +output_encoding: "iso-8859-1" +plugins: [ + "skeleton", + "hierarchy", + "link", + "read_rst", + "read_html", + "read_copyonly", + "read_markdown", + "template_mako", + ] +exclude_dir: [ + ] +exclude_files: [ + "webber.conf", + "*.tmpl", + ] +copy_files: [ + "*.png", + "*.jpg", + "*.gif", + "*.mpg", + "*.css", + "robots.txt", +] diff --git a/plugins/__init__.py b/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/hierarchy.py b/plugins/hierarchy.py new file mode 100644 index 0000000..6aa2f3e --- /dev/null +++ b/plugins/hierarchy.py @@ -0,0 +1,195 @@ +# -*- coding: iso-8859-1 -*- +from webber import * +import re + +reSPLIT = re.compile(r',\s*') + +_childs = {} +_parent = {} + +def memorize_links(title, links): + global _childs + if not links: + return + order = 100 + for link in reSPLIT.split(links): + #print title, link + if not _childs.has_key(title): + _childs[title] = [] + _childs[title].append( (order,link)) + order += 100 + _parent[link] = title + + +def memorize_parent(title, parent, order): + #print "memorize_parent:", title, parent + #print " parent:", _parent + #print " childs:", _childs + if order==-1: + order = 0 + if _childs.has_key(parent): + for c in _childs: + for o,n in _childs[c]: + if o > order: + order = o + else: + _childs[parent] = [] + order += 100 + #print "order:", title, order + if not _childs.has_key(parent): + _childs[parent] = [] + _childs[parent].append( (order, title) ) + _parent[title] = parent + + +# +# The "scan" plugins should scan for meta-data, mostly for links. +# +# params.direc contains the "class Directory" object +# params.file has the "class File" object +# params.file.contents contains the text +# +@set_hook("scan") +def scan(params): + file = params["file"] + if file.has_key("links"): + memorize_links(file.linktitle, file.links) + if file.has_key("parent"): + if file.has_key("order"): + order = int(file.order) + else: + order = -1 + memorize_parent(file.linktitle, file.parent, order) + + +@set_hook("scan_done") +def scan_done(params): + """After every file has been scanned, we sort the list of childs-per-page + in ascending order.""" + + for c in _childs: + _childs[c].sort() + return + + print "_parent:" + for c in _parent: + print " ", c, _parent[c] + print "_childs:" + for c in _childs: print " ", c,_childs[c] + + +@set_function("get_breadcrumbs") +def get_breadcrumbs(orig_page): + """Returns something like ['Home', 'Beruf', 'Werdegang']. This can + be easyly used to generate breadcrumbs HTML code.""" + res = [(orig_page, get_link_from(orig_page, orig_page))] + page = orig_page + #print "orig_page:", orig_page + while _parent.has_key(page): + page = _parent[page] + link = get_link_from(orig_page, page) + #print " page, link:", page, link + res.insert(0, (page, link)) + return res + + + +@set_function("get_sidemenu") +def get_sidemenu(page): + """Returns an array with a side-menu. Everything from the current + page upwards is shown, as well as one level below the current + position. The array has the following items: + + level part-of-path current-page title + + Example: + 0 1 0 Home + 0 1 0 Beruf + 1 0 0 Kenntnisse + 1 1 0 Werdegang + 2 0 1 Alte + 0 0 0 Haus + """ + # Determine root page: + bread = get_breadcrumbs(page) + #print "Menu for:", page + #print "Bread:", bread + + root = "Home" #TODO + res = [(0, 1, int(root==page), root, get_link_from(page, root))] + + def do_menu(pg, level): + #print "pg, has_key:", pg, _childs.has_key(pg) + if _childs.has_key(pg): + for p in _childs[pg]: + subpage = p[1] + in_bread = False + for b in bread: + if b[0] == subpage: + in_bread = True + break + + go_deeper = in_bread or (subpage==page) + #print "subpage:", subpage, "in bread:", in_bread, "go deeper:", go_deeper + link = get_link_from(page, subpage) + res.append((level, int(subpage in bread), int(subpage==page), subpage, link)) + if go_deeper: + do_menu(subpage, level+1) + + # TODO: make this configurable, e.g. cfg.rootpage, otherwise a page + # that is outside of the menu won't show a menu + do_menu("Home", 0) + return res + + + + +@set_function("get_recently") +def get_recently(file): + #file = get_current_file() + #print "XXXXXX:", file.linktitle + pg = [] + + max_n = 10 # TODO: configurable? + orig_page = file.linktitle + + def addPage(pg, title): + #print "addPage", title + for f in files: + file = files[f] + #print file + if file.has_key("linktitle") and file.linktitle == title: + pg.append( (file.mtime, file.ctime, file.title, get_link_from(orig_page, file.linktitle)) ) + if _childs.has_key(file.linktitle): + for c in _childs[file.linktitle]: + #print "c:", c + addPage(pg, c[1]) + if len(pg) == max_n: + return + addPage(pg, orig_page) + pg.sort(reverse=True) + #for p in pg: print p + return pg + + + + + +if __name__ == "__main__": + # You can call this test-code this way: + # + # PYTHONPATH=`pwd` python plugins/hierarchy.py + # + memorize_parent("Impressum", "Home", 99999) + memorize_parent("Beruf", "Home", 100) + memorize_parent("Werdegang", "Beruf", 100) + memorize_parent("Kenntnisse", "Beruf", 200) + scan_done(None) + + #print get_breadcrumbs("Home") + #print get_breadcrumbs("Beruf") + #print get_breadcrumbs("Werdegang") + #print get_breadcrumbs("Kenntnisse") + #for t in get_sidemenu("Home"): print t + #for t in get_sidemenu("Beruf"): print t + for t in get_sidemenu("Kenntnisse"): print t diff --git a/plugins/link.py b/plugins/link.py new file mode 100644 index 0000000..ae86ca0 --- /dev/null +++ b/plugins/link.py @@ -0,0 +1,80 @@ +# -*- coding: iso-8859-1 -*- +from webber import * +import os, re + +# To understand this beast, read /usr/share/doc/python2.5-doc/html/lib/module-re.html :-) + +reLink = re.compile(r''' + \[\[ # Begin of link + (?=[^!]) # Don't fire for macros + (?: + ([^\]\|]+) # 1: link text + \| # followed by '|' + )? # optional + ([^\n\r\]#]+) # 2: page to link to + ( + \# # '#', beginning of anchor + [^\s\]]+ # 3: anchor text, doesn't contain spaces or ']' + )? # optional + \]\] # end of link + ''', re.VERBOSE) + +def do_link(m): + """Used in re.sub() to substitute link with HTML""" + text = m.group(1) or "" + text = text.replace("\n", " ") + link = m.group(2).replace("\n", " ") + anchor = m.group(3) or "" + if link.find(".") == -1: + #link = link.tolower() + for f in files: + file = files[f] + if file.title == link or \ + file.linktitle == link or \ + os.path.splitext(os.path.basename(file.path))[0] == link: + #print "LINK: '%s' '%s' -> '%s'" % (text, link, file.linktitle) + if not text: + text = file.title + link = get_link_from(get_current_file().linktitle, file.linktitle) + #print "LINK: '%s' '%s'" % (text, link) + break + # TODO: validate link + return '%s' % (link, anchor, text) + + +def test_link(): + for s in ( + 'Before [[!macro]] after', + 'Before [[link]] after', + 'Before [[Text|link]] after', + 'Before [[Text|link#anchor]] after' + ): + m = reLink.search(s) + if m: + print "link:", s + print " name:", m.group(1) + print " link:", m.group(2) + print " anchor:", m.group(3) + else: + print "No link:", s + +def test_sub(): + for s in ( + 'Before [[!macro]] after', + 'Before [[link]] after', + 'Before [[Text|link]] after', + 'Before [[Text|link#anchor]] after' + ): + print s + res = reLink.sub(do_link, s) + print "", res + +#test_link() +#test_sub() + + + + +@set_hook("linkify") +def linkify(params): + return reLink.sub(do_link, params.file.contents) diff --git a/plugins/read_copyonly.py b/plugins/read_copyonly.py new file mode 100644 index 0000000..3e6b1f2 --- /dev/null +++ b/plugins/read_copyonly.py @@ -0,0 +1,31 @@ +# -*- coding: iso-8859-1 -*- +from webber import * +import os, fnmatch + + +@set_hook("read") +def read(params): + file = params.file + #print "file:", file.rel_path + for c in cfg.copy_files: + if fnmatch.fnmatchcase(file.rel_path, c): + #print "Copy:", file.rel_path + file.render = "copyfile" + file.contents = "" + +@set_hook("copyfile") +def copyfile(params): + file = params.file + log("copying file %s" % file.rel_path, level=7) + out_path = os.path.join(cfg.out_dir, file.rel_path) + out_dir = os.path.split(out_path)[0] + try: + os.makedirs(out_dir) + except OSError: + pass + cmd = "cp -l %s %s" % ( + os.path.join(cfg.in_dir, file.rel_path), + out_path + ) + #print cmd + os.system(cmd) diff --git a/plugins/read_html.py b/plugins/read_html.py new file mode 100644 index 0000000..0ad6a8b --- /dev/null +++ b/plugins/read_html.py @@ -0,0 +1,22 @@ +# -*- coding: iso-8859-1 -*- +from webber import * + + +@set_hook("read") +def read(params): + file = params.file + if file.rel_path.endswith(".html"): + file.render = "html" + f = file.read_keywords() + return f.read() + + +@set_hook("htmlize") +def htmlize(params): + """Parse HTML and "convert" it to HTML :-)""" + + file = params.file + if not file.rel_path.endswith(".html"): + return + + return file.contents diff --git a/plugins/read_markdown.py b/plugins/read_markdown.py new file mode 100644 index 0000000..65f8c06 --- /dev/null +++ b/plugins/read_markdown.py @@ -0,0 +1,1607 @@ +# -*- coding: iso-8859-1 -*- +from webber import * + +# based on code from http://code.google.com/p/python-markdown2/ +# Copyright (c) 2007-2008 ActiveState Corp. +# License: MIT (http://www.opensource.org/licenses/mit-license.php) +# +# I used version 1.0.1.12, but deleted: +# * file-vars (emacs-style settings inside the file) +# * Standardize line endings +# * call to _do_links() +# * logging +# * allow "= Header =" in addition to "# Header #" +# + +import os, sys, re, codecs +try: + from hashlib import md5 +except ImportError: + from md5 import md5 +from random import random + + + +#---- Python version compat + +if sys.version_info[:2] < (2,4): + from sets import Set as set + def reversed(sequence): + for i in sequence[::-1]: + yield i + def _unicode_decode(s, encoding, errors='xmlcharrefreplace'): + return unicode(s, encoding, errors) +else: + def _unicode_decode(s, encoding, errors='strict'): + return s.decode(encoding, errors) + + +#---- globals + +DEBUG = False + +DEFAULT_TAB_WIDTH = 4 + +# Table of hash values for escaped characters: +def _escape_hash(s): + # Lame attempt to avoid possible collision with someone actually + # using the MD5 hexdigest of one of these chars in there text. + # Other ideas: random.random(), uuid.uuid() + #return md5(s).hexdigest() # Markdown.pl effectively does this. + return 'md5-'+md5(s).hexdigest() +g_escape_table = dict([(ch, _escape_hash(ch)) for ch in '\\`*_{}[]()>#+-.!']) + + + +#---- exceptions + +class MarkdownError(Exception): + pass + + + +#---- public api + +def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, + safe_mode=None, extras=None, link_patterns=None): + return Markdown(html4tags=html4tags, tab_width=tab_width, + safe_mode=safe_mode, extras=extras, + link_patterns=link_patterns).convert(text) + +class Markdown(object): + # The dict of "extras" to enable in processing -- a mapping of + # extra name to argument for the extra. Most extras do not have an + # argument, in which case the value is None. + # + # This can be set via (a) subclassing and (b) the constructor + # "extras" argument. + extras = None + + urls = None + titles = None + html_blocks = None + html_spans = None + html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py + + # Used to track when we're inside an ordered or unordered list + # (see _ProcessListItems() for details): + list_level = 0 + + _ws_only_line_re = re.compile(r"^[ \t]+$", re.M) + + def __init__(self, html4tags=False, tab_width=4, safe_mode=None, + extras=None, link_patterns=None): + if html4tags: + self.empty_element_suffix = ">" + else: + self.empty_element_suffix = " />" + self.tab_width = tab_width + + # For compatibility with earlier markdown2.py and with + # markdown.py's safe_mode being a boolean, + # safe_mode == True -> "replace" + if safe_mode is True: + self.safe_mode = "replace" + else: + self.safe_mode = safe_mode + + if self.extras is None: + self.extras = {} + elif not isinstance(self.extras, dict): + self.extras = dict([(e, None) for e in self.extras]) + if extras: + if not isinstance(extras, dict): + extras = dict([(e, None) for e in extras]) + self.extras.update(extras) + assert isinstance(self.extras, dict) + self._instance_extras = self.extras.copy() + self.link_patterns = link_patterns + self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M) + + def reset(self): + self.urls = {} + self.titles = {} + self.html_blocks = {} + self.html_spans = {} + self.list_level = 0 + self.extras = self._instance_extras.copy() + self.encoding = 'utf-8' + if "footnotes" in self.extras: + self.footnotes = {} + self.footnote_ids = [] + + def convert(self, text, encoding=None): + """Convert the given text.""" + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialChars(), so that any *'s or _'s in the + # and tags get encoded. + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + self.reset() + if encoding: + self.encoding = encoding + + if not isinstance(text, unicode): + text = unicode(text, self.encoding) + + # Standardize line endings: + #text = re.sub("\r\n|\r", "\n", text) + + # Make sure $text ends with a couple of newlines: + text += "\n\n" + + # Convert all tabs to spaces. + text = self._detab(text) + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + text = self._ws_only_line_re.sub("", text) + + if self.safe_mode: + text = self._hash_html_spans(text) + + # Turn block-level HTML blocks into hash entries + text = self._hash_html_blocks(text, raw=True) + + # Strip link definitions, store in hashes. + if "footnotes" in self.extras: + # Must do footnotes first because an unlucky footnote defn + # looks like a link defn: + # [^4]: this "looks like a link defn" + text = self._strip_footnote_definitions(text) + text = self._strip_link_definitions(text) + + text = self._run_block_gamut(text) + + text = self._unescape_special_chars(text) + + if "footnotes" in self.extras: + text = self._add_footnotes(text) + + if self.safe_mode: + text = self._unhash_html_spans(text) + + text += "\n" + return text + + # Cribbed from a post by Bart Lateur: + # + _detab_re = re.compile(r'(.*?)\t', re.M) + def _detab_sub(self, match): + g1 = match.group(1) + return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width)) + def _detab(self, text): + r"""Remove (leading?) tabs from a file. + + >>> m = Markdown() + >>> m._detab("\tfoo") + ' foo' + >>> m._detab(" \tfoo") + ' foo' + >>> m._detab("\t foo") + ' foo' + >>> m._detab(" foo") + ' foo' + >>> m._detab(" foo\n\tbar\tblam") + ' foo\n bar blam' + """ + if '\t' not in text: + return text + return self._detab_re.subn(self._detab_sub, text)[0] + + _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del' + _strict_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_a, + re.X | re.M) + + _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math' + _liberal_tag_block_re = re.compile(r""" + ( # save in \1 + ^ # start of line (with re.M) + <(%s) # start tag = \2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + """ % _block_tags_b, + re.X | re.M) + + def _hash_html_block_sub(self, match, raw=False): + html = match.group(1) + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + return "\n\n" + key + "\n\n" + + def _hash_html_blocks(self, text, raw=False): + """Hashify HTML blocks + + We only want to do this for block-level HTML tags, such as headers, + lists, and tables. That's because we still want to wrap

s around + "paragraphs" that are wrapped in non-block-level tags, such as anchors, + phrase emphasis, and spans. The list of tags we're looking for is + hard-coded. + + @param raw {boolean} indicates if these are raw HTML blocks in + the original source. It makes a difference in "safe" mode. + """ + if '<' not in text: + return text + + # Pass `raw` value into our calls to self._hash_html_block_sub. + hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw) + + # First, look for nested blocks, e.g.: + #

+ #
+ # tags for inner block must be indented. + #
+ #
+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
` and stop at the first `
`. + text = self._strict_tag_block_re.sub(hash_html_block_sub, text) + + # Now match more liberally, simply from `\n` to `\n` + text = self._liberal_tag_block_re.sub(hash_html_block_sub, text) + + # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + if "", start_idx) + 3 + except ValueError, ex: + break + + # Start position for next comment block search. + start = end_idx + + # Validate whitespace before comment. + if start_idx: + # - Up to `tab_width - 1` spaces before start_idx. + for i in range(self.tab_width - 1): + if text[start_idx - 1] != ' ': + break + start_idx -= 1 + if start_idx == 0: + break + # - Must be preceded by 2 newlines or hit the start of + # the document. + if start_idx == 0: + pass + elif start_idx == 1 and text[0] == '\n': + start_idx = 0 # to match minute detail of Markdown.pl regex + elif text[start_idx-2:start_idx] == '\n\n': + pass + else: + break + + # Validate whitespace after comment. + # - Any number of spaces and tabs. + while end_idx < len(text): + if text[end_idx] not in ' \t': + break + end_idx += 1 + # - Must be following by 2 newlines or hit end of text. + if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'): + continue + + # Escape and hash (must match `_hash_html_block_sub`). + html = text[start_idx:end_idx] + if raw and self.safe_mode: + html = self._sanitize_html(html) + key = _hash_text(html) + self.html_blocks[key] = html + text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:] + + if "xml" in self.extras: + # Treat XML processing instructions and namespaced one-liner + # tags as if they were block HTML tags. E.g., if standalone + # (i.e. are their own paragraph), the following do not get + # wrapped in a

tag: + # + # + # + _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width) + text = _xml_oneliner_re.sub(hash_html_block_sub, text) + + return text + + def _strip_link_definitions(self, text): + # Strips link definitions from text, stores the URLs and titles in + # hash references. + less_than_tab = self.tab_width - 1 + + # Link defs are in the form: + # [id]: url "optional title" + _link_def_re = re.compile(r""" + ^[ ]{0,%d}\[(.+)\]: # id = \1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = \2 + [ \t]* + (?: + \n? # maybe one newline + [ \t]* + (?<=\s) # lookbehind for whitespace + ['"(] + ([^\n]*) # title = \3 + ['")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + """ % less_than_tab, re.X | re.M | re.U) + return _link_def_re.sub(self._extract_link_def_sub, text) + + def _extract_link_def_sub(self, match): + id, url, title = match.groups() + key = id.lower() # Link IDs are case-insensitive + self.urls[key] = self._encode_amps_and_angles(url) + if title: + self.titles[key] = title.replace('"', '"') + return "" + + def _extract_footnote_def_sub(self, match): + id, text = match.groups() + text = _dedent(text, skip_first_line=not text.startswith('\n')).strip() + normed_id = re.sub(r'\W', '-', id) + # Ensure footnote text ends with a couple newlines (for some + # block gamut matches). + self.footnotes[normed_id] = text + "\n\n" + return "" + + def _strip_footnote_definitions(self, text): + """A footnote definition looks like this: + + [^note-id]: Text of the note. + + May include one or more indented paragraphs. + + Where, + - The 'note-id' can be pretty much anything, though typically it + is the number of the footnote. + - The first paragraph may start on the next line, like so: + + [^note-id]: + Text of the note. + """ + less_than_tab = self.tab_width - 1 + footnote_def_re = re.compile(r''' + ^[ ]{0,%d}\[\^(.+)\]: # id = \1 + [ \t]* + ( # footnote text = \2 + # First line need not start with the spaces. + (?:\s*.*\n+) + (?: + (?:[ ]{%d} | \t) # Subsequent lines must be indented. + .*\n+ + )* + ) + # Lookahead for non-space at line-start, or end of doc. + (?:(?=^[ ]{0,%d}\S)|\Z) + ''' % (less_than_tab, self.tab_width, self.tab_width), + re.X | re.M) + return footnote_def_re.sub(self._extract_footnote_def_sub, text) + + + _hr_res = [ + re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M), + re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M), + ] + + def _run_block_gamut(self, text): + # These are all the transformations that form block-level + # tags like paragraphs, headers, and list items. + + text = self._do_headers(text) + + # Do Horizontal Rules: + hr = "\n tags around block-level tags. + text = self._hash_html_blocks(text) + + text = self._form_paragraphs(text) + + return text + + def _pyshell_block_sub(self, match): + lines = match.group(0).splitlines(0) + _dedentlines(lines) + indent = ' ' * self.tab_width + s = ('\n' # separate from possible cuddled paragraph + + indent + ('\n'+indent).join(lines) + + '\n\n') + return s + + def _prepare_pyshell_blocks(self, text): + """Ensure that Python interactive shell sessions are put in + code blocks -- even if not properly indented. + """ + if ">>>" not in text: + return text + + less_than_tab = self.tab_width - 1 + _pyshell_block_re = re.compile(r""" + ^([ ]{0,%d})>>>[ ].*\n # first line + ^(\1.*\S+.*\n)* # any number of subsequent lines + ^\n # ends with a blank line + """ % less_than_tab, re.M | re.X) + + return _pyshell_block_re.sub(self._pyshell_block_sub, text) + + def _run_span_gamut(self, text): + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + + text = self._do_code_spans(text) + + text = self._escape_special_chars(text) + + # Process anchor and image tags. + #text = self._do_links(text) + + # Make links out of things like `` + # Must come after _do_links(), because you can use < and > + # delimiters in inline links like [this](). + text = self._do_auto_links(text) + + if "link-patterns" in self.extras: + text = self._do_link_patterns(text) + + text = self._encode_amps_and_angles(text) + + text = self._do_italics_and_bold(text) + + # Do hard breaks: + text = re.sub(r" {2,}\n", " + | + # auto-link (e.g., ) + <\w+[^>]*> + | + # comment + | + <\?.*?\?> # processing instruction + ) + """, re.X) + + def _escape_special_chars(self, text): + # Python markdown note: the HTML tokenization here differs from + # that in Markdown.pl, hence the behaviour for subtle cases can + # differ (I believe the tokenizer here does a better job because + # it isn't susceptible to unmatched '<' and '>' in HTML tags). + # Note, however, that '>' is not allowed in an auto-link URL + # here. + escaped = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup: + # Within tags/HTML-comments/auto-links, encode * and _ + # so they don't conflict with their use in Markdown for + # italics and strong. We're replacing each such + # character with its corresponding MD5 checksum value; + # this is likely overkill, but it should prevent us from + # colliding with the escape values by accident. + escaped.append(token.replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + else: + escaped.append(self._encode_backslash_escapes(token)) + is_html_markup = not is_html_markup + return ''.join(escaped) + + def _hash_html_spans(self, text): + # Used for safe_mode. + + def _is_auto_link(s): + if ':' in s and self._auto_link_re.match(s): + return True + elif '@' in s and self._auto_email_link_re.match(s): + return True + return False + + tokens = [] + is_html_markup = False + for token in self._sorta_html_tokenize_re.split(text): + if is_html_markup and not _is_auto_link(token): + sanitized = self._sanitize_html(token) + key = _hash_text(sanitized) + self.html_spans[key] = sanitized + tokens.append(key) + else: + tokens.append(token) + is_html_markup = not is_html_markup + return ''.join(tokens) + + def _unhash_html_spans(self, text): + for key, sanitized in self.html_spans.items(): + text = text.replace(key, sanitized) + return text + + def _sanitize_html(self, s): + if self.safe_mode == "replace": + return self.html_removed_text + elif self.safe_mode == "escape": + replacements = [ + ('&', '&'), + ('<', '<'), + ('>', '>'), + ] + for before, after in replacements: + s = s.replace(before, after) + return s + else: + raise MarkdownError("invalid value for 'safe_mode': %r (must be " + "'escape' or 'replace')" % self.safe_mode) + + _tail_of_inline_link_re = re.compile(r''' + # Match tail of: [text](/url/) or [text](/url/ "title") + \( # literal paren + [ \t]* + (?P # \1 + <.*?> + | + .*? + ) + [ \t]* + ( # \2 + (['"]) # quote char = \3 + (?P.*?) + \3 # matching quote + )? # title is optional + \) + ''', re.X | re.S) + _tail_of_reference_link_re = re.compile(r''' + # Match tail of: [text][id] + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[ + (?P<id>.*?) + \] + ''', re.X | re.S) + + def _do_links(self, text): + """Turn Markdown link shortcuts into XHTML <a> and <img> tags. + + This is a combination of Markdown.pl's _DoAnchors() and + _DoImages(). They are done together because that simplified the + approach. It was necessary to use a different approach than + Markdown.pl because of the lack of atomic matching support in + Python's regex engine used in $g_nested_brackets. + """ + MAX_LINK_TEXT_SENTINEL = 300 + + # `anchor_allowed_pos` is used to support img links inside + # anchors, but not anchors inside anchors. An anchor's start + # pos must be `>= anchor_allowed_pos`. + anchor_allowed_pos = 0 + + curr_pos = 0 + while True: # Handle the next link. + # The next '[' is the start of: + # - an inline anchor: [text](url "title") + # - a reference anchor: [text][id] + # - an inline img: ![text](url "title") + # - a reference img: ![text][id] + # - a footnote ref: [^id] + # (Only if 'footnotes' extra enabled) + # - a footnote defn: [^id]: ... + # (Only if 'footnotes' extra enabled) These have already + # been stripped in _strip_footnote_definitions() so no + # need to watch for them. + # - a link definition: [id]: url "title" + # These have already been stripped in + # _strip_link_definitions() so no need to watch for them. + # - not markup: [...anything else... + try: + start_idx = text.index('[', curr_pos) + except ValueError: + break + text_length = len(text) + + # Find the matching closing ']'. + # Markdown.pl allows *matching* brackets in link text so we + # will here too. Markdown.pl *doesn't* currently allow + # matching brackets in img alt text -- we'll differ in that + # regard. + bracket_depth = 0 + for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL, + text_length)): + ch = text[p] + if ch == ']': + bracket_depth -= 1 + if bracket_depth < 0: + break + elif ch == '[': + bracket_depth += 1 + else: + # Closing bracket not found within sentinel length. + # This isn't markup. + curr_pos = start_idx + 1 + continue + link_text = text[start_idx+1:p] + + # Possibly a footnote ref? + if "footnotes" in self.extras and link_text.startswith("^"): + normed_id = re.sub(r'\W', '-', link_text[1:]) + if normed_id in self.footnotes: + self.footnote_ids.append(normed_id) + result = '<sup class="footnote-ref" id="fnref-%s">' \ + '<a href="#fn-%s">%s</a></sup>' \ + % (normed_id, normed_id, len(self.footnote_ids)) + text = text[:start_idx] + result + text[p+1:] + else: + # This id isn't defined, leave the markup alone. + curr_pos = p+1 + continue + + # Now determine what this is by the remainder. + p += 1 + if p == text_length: + return text + + # Inline anchor or img? + if text[p] == '(': # attempt at perf improvement + match = self._tail_of_inline_link_re.match(text, p) + if match: + # Handle an inline anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + + url, title = match.group("url"), match.group("title") + if url and url[0] == '<': + url = url[1:-1] # '<url>' -> 'url' + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + if title: + title_str = ' title="%s"' \ + % title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) \ + .replace('"', '"') + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + continue + + # Reference anchor or img? + else: + match = self._tail_of_reference_link_re.match(text, p) + if match: + # Handle a reference-style anchor or img. + is_img = start_idx > 0 and text[start_idx-1] == "!" + if is_img: + start_idx -= 1 + link_id = match.group("id").lower() + if not link_id: + link_id = link_text.lower() # for links like [this][] + if link_id in self.urls: + url = self.urls[link_id] + # We've got to encode these to avoid conflicting + # with italics/bold. + url = url.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title = self.titles.get(link_id) + if title: + title = title.replace('*', g_escape_table['*']) \ + .replace('_', g_escape_table['_']) + title_str = ' title="%s"' % title + else: + title_str = '' + if is_img: + result = '<img src="%s" alt="%s"%s%s' \ + % (url, link_text.replace('"', '"'), + title_str, self.empty_element_suffix) + curr_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + elif start_idx >= anchor_allowed_pos: + result = '<a href="%s"%s>%s</a>' \ + % (url, title_str, link_text) + result_head = '<a href="%s"%s>' % (url, title_str) + result = '%s%s</a>' % (result_head, link_text) + # <img> allowed from curr_pos on, <a> from + # anchor_allowed_pos on. + curr_pos = start_idx + len(result_head) + anchor_allowed_pos = start_idx + len(result) + text = text[:start_idx] + result + text[match.end():] + else: + # Anchor not allowed here. + curr_pos = start_idx + 1 + else: + # This id isn't defined, leave the markup alone. + curr_pos = match.end() + continue + + # Otherwise, it isn't markup. + curr_pos = start_idx + 1 + + return text + + + _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M) + def _setext_h_sub(self, match): + n = {"=": 1, "-": 2}[match.group(2)[0]] + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(1)), n) + + _atx_h_re = re.compile(r''' + ^([\#=]{1,6}) # \1 = string of #'s + [ \t]* + (.+?) # \2 = Header text + [ \t]* + (?<!\\) # ensure not an escaped trailing '#' + [\#=]* # optional closing #'s (not counted) + \n+ + ''', re.X | re.M) + def _atx_h_sub(self, match): + n = len(match.group(1)) + demote_headers = self.extras.get("demote-headers") + if demote_headers: + n = min(n + demote_headers, 6) + return "<h%d>%s</h%d>\n\n" \ + % (n, self._run_span_gamut(match.group(2)), n) + + def _do_headers(self, text): + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + text = self._setext_h_re.sub(self._setext_h_sub, text) + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + text = self._atx_h_re.sub(self._atx_h_sub, text) + + return text + + + _marker_ul_chars = '*+-' + _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars + _marker_ul = '(?:[%s])' % _marker_ul_chars + _marker_ol = r'(?:\d+\.)' + + def _list_sub(self, match): + lst = match.group(1) + lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol" + result = self._process_list_items(lst) + if self.list_level: + return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type) + else: + return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type) + + def _do_lists(self, text): + # Form HTML ordered (numbered) and unordered (bulleted) lists. + + for marker_pat in (self._marker_ul, self._marker_ol): + # Re-usable pattern to match any entire ul or ol list: + less_than_tab = self.tab_width - 1 + whole_list = r''' + ( # \1 = whole list + ( # \2 + [ ]{0,%d} + (%s) # \3 = first list item marker + [ \t]+ + ) + (?:.+?) + ( # \4 + \Z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + %s[ \t]+ + ) + ) + ) + ''' % (less_than_tab, marker_pat, marker_pat) + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _process_list_items(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if self.list_level: + sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S) + text = sub_list_re.sub(self._list_sub, text) + else: + list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list, + re.X | re.M | re.S) + text = list_re.sub(self._list_sub, text) + + return text + + _list_item_re = re.compile(r''' + (\n)? # leading line = \1 + (^[ \t]*) # leading whitespace = \2 + (%s) [ \t]+ # list marker = \3 + ((?:.+?) # list item text = \4 + (\n{1,2})) # eols = \5 + (?= \n* (\Z | \2 (%s) [ \t]+)) + ''' % (_marker_any, _marker_any), + re.M | re.X | re.S) + + _last_li_endswith_two_eols = False + def _list_item_sub(self, match): + item = match.group(4) + leading_line = match.group(1) + leading_space = match.group(2) + if leading_line or "\n\n" in item or self._last_li_endswith_two_eols: + item = self._run_block_gamut(self._outdent(item)) + else: + # Recursion for sub-lists: + item = self._do_lists(self._outdent(item)) + if item.endswith('\n'): + item = item[:-1] + item = self._run_span_gamut(item) + self._last_li_endswith_two_eols = (len(match.group(5)) == 2) + return "<li>%s</li>\n" % item + + def _process_list_items(self, list_str): + # Process the contents of a single ordered or unordered list, + # splitting it into individual list items. + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + self.list_level += 1 + self._last_li_endswith_two_eols = False + list_str = list_str.rstrip('\n') + '\n' + list_str = self._list_item_re.sub(self._list_item_sub, list_str) + self.list_level -= 1 + return list_str + + def _get_pygments_lexer(self, lexer_name): + try: + from pygments import lexers, util + except ImportError: + return None + try: + return lexers.get_lexer_by_name(lexer_name) + except util.ClassNotFound: + return None + + def _color_with_pygments(self, codeblock, lexer, **formatter_opts): + import pygments + import pygments.formatters + + class HtmlCodeFormatter(pygments.formatters.HtmlFormatter): + def _wrap_code(self, inner): + """A function for use in a Pygments Formatter which + wraps in <code> tags. + """ + yield 0, "<code>" + for tup in inner: + yield tup + yield 0, "</code>" + + def wrap(self, source, outfile): + """Return the source with a code, pre, and div.""" + return self._wrap_div(self._wrap_pre(self._wrap_code(source))) + + formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts) + return pygments.highlight(codeblock, lexer, formatter) + + def _code_block_sub(self, match): + codeblock = match.group(1) + codeblock = self._outdent(codeblock) + codeblock = self._detab(codeblock) + codeblock = codeblock.lstrip('\n') # trim leading newlines + codeblock = codeblock.rstrip() # trim trailing whitespace + + if "code-color" in self.extras and codeblock.startswith(":::"): + lexer_name, rest = codeblock.split('\n', 1) + lexer_name = lexer_name[3:].strip() + lexer = self._get_pygments_lexer(lexer_name) + codeblock = rest.lstrip("\n") # Remove lexer declaration line. + if lexer: + formatter_opts = self.extras['code-color'] or {} + colored = self._color_with_pygments(codeblock, lexer, + **formatter_opts) + return "\n\n%s\n\n" % colored + + codeblock = self._encode_code(codeblock) + return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock + + def _do_code_blocks(self, text): + """Process Markdown `<pre><code>` blocks.""" + code_block_re = re.compile(r''' + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + ''' % (self.tab_width, self.tab_width), + re.M | re.X) + + return code_block_re.sub(self._code_block_sub, text) + + + # Rules for a code span: + # - backslash escapes are not interpreted in a code span + # - to include one or or a run of more backticks the delimiters must + # be a longer run of backticks + # - cannot start or end a code span with a backtick; pad with a + # space and that space will be removed in the emitted HTML + # See `test/tm-cases/escapes.text` for a number of edge-case + # examples. + _code_span_re = re.compile(r''' + (?<!\\) + (`+) # \1 = Opening run of ` + (?!`) # See Note A test/tm-cases/escapes.text + (.+?) # \2 = The code block + (?<!`) + \1 # Matching closer + (?!`) + ''', re.X | re.S) + + def _code_span_sub(self, match): + c = match.group(2).strip(" \t") + c = self._encode_code(c) + return "<code>%s</code>" % c + + def _do_code_spans(self, text): + # * Backtick quotes are used for <code></code> spans. + # + # * You can use multiple backticks as the delimiters if you want to + # include literal backticks in the code span. So, this input: + # + # Just type ``foo `bar` baz`` at the prompt. + # + # Will translate to: + # + # <p>Just type <code>foo `bar` baz</code> at the prompt.</p> + # + # There's no arbitrary limit to the number of backticks you + # can use as delimters. If you need three consecutive backticks + # in your code, use four for delimiters, etc. + # + # * You can use spaces to get literal backticks at the edges: + # + # ... type `` `bar` `` ... + # + # Turns to: + # + # ... type <code>`bar`</code> ... + return self._code_span_re.sub(self._code_span_sub, text) + + def _encode_code(self, text): + """Encode/escape certain characters inside Markdown code runs. + The point is that in code, these characters are literals, + and lose their special Markdown meanings. + """ + replacements = [ + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + ('&', '&'), + # Do the angle bracket song and dance: + ('<', '<'), + ('>', '>'), + # Now, escape characters that are magic in Markdown: + ('*', g_escape_table['*']), + ('_', g_escape_table['_']), + ('{', g_escape_table['{']), + ('}', g_escape_table['}']), + ('[', g_escape_table['[']), + (']', g_escape_table[']']), + ('\\', g_escape_table['\\']), + ] + for before, after in replacements: + text = text.replace(before, after) + return text + + _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S) + _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S) + _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S) + _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S) + def _do_italics_and_bold(self, text): + # <strong> must go first: + if "code-friendly" in self.extras: + text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text) + text = self._code_friendly_em_re.sub(r"<em>\1</em>", text) + else: + text = self._strong_re.sub(r"<strong>\2</strong>", text) + text = self._em_re.sub(r"<em>\2</em>", text) + return text + + + _block_quote_re = re.compile(r''' + ( # Wrap whole match in \1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + ''', re.M | re.X) + _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M); + + _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S) + def _dedent_two_spaces_sub(self, match): + return re.sub(r'(?m)^ ', '', match.group(1)) + + def _block_quote_sub(self, match): + bq = match.group(1) + bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting + bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines + bq = self._run_block_gamut(bq) # recurse + + bq = re.sub('(?m)^', ' ', bq) + # These leading spaces screw with <pre> content, so we need to fix that: + bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) + + return "<blockquote>\n%s\n</blockquote>\n\n" % bq + + def _do_block_quotes(self, text): + if '>' not in text: + return text + return self._block_quote_re.sub(self._block_quote_sub, text) + + def _form_paragraphs(self, text): + # Strip leading and trailing lines: + text = text.strip('\n') + + # Wrap <p> tags. + grafs = re.split(r"\n{2,}", text) + for i, graf in enumerate(grafs): + if graf in self.html_blocks: + # Unhashify HTML blocks + grafs[i] = self.html_blocks[graf] + else: + # Wrap <p> tags. + graf = self._run_span_gamut(graf) + grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>" + + return "\n\n".join(grafs) + + def _add_footnotes(self, text): + if self.footnotes: + footer = [ + '<div class="footnotes">', + '<hr' + self.empty_element_suffix, + '<ol>', + ] + for i, id in enumerate(self.footnote_ids): + if i != 0: + footer.append('') + footer.append('<li id="fn-%s">' % id) + footer.append(self._run_block_gamut(self.footnotes[id])) + backlink = ('<a href="#fnref-%s" ' + 'class="footnoteBackLink" ' + 'title="Jump back to footnote %d in the text.">' + '↩</a>' % (id, i+1)) + if footer[-1].endswith("</p>"): + footer[-1] = footer[-1][:-len("</p>")] \ + + ' ' + backlink + "</p>" + else: + footer.append("\n<p>%s</p>" % backlink) + footer.append('</li>') + footer.append('</ol>') + footer.append('</div>') + return text + '\n\n' + '\n'.join(footer) + else: + return text + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)') + _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I) + _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I) + + def _encode_amps_and_angles(self, text): + # Smart processing for ampersands and angle brackets that need + # to be encoded. + text = self._ampersand_re.sub('&', text) + + # Encode naked <'s + text = self._naked_lt_re.sub('<', text) + + # Encode naked >'s + # Note: Other markdown implementations (e.g. Markdown.pl, PHP + # Markdown) don't do this. + text = self._naked_gt_re.sub('>', text) + return text + + def _encode_backslash_escapes(self, text): + for ch, escape in g_escape_table.items(): + text = text.replace("\\"+ch, escape) + return text + + _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I) + def _auto_link_sub(self, match): + g1 = match.group(1) + return '<a href="%s">%s</a>' % (g1, g1) + + _auto_email_link_re = re.compile(r""" + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-\w]+(\.[-\w]+)*\.[a-z]+ + ) + > + """, re.I | re.X | re.U) + def _auto_email_link_sub(self, match): + return self._encode_email_address( + self._unescape_special_chars(match.group(1))) + + def _do_auto_links(self, text): + text = self._auto_link_re.sub(self._auto_link_sub, text) + text = self._auto_email_link_re.sub(self._auto_email_link_sub, text) + return text + + def _encode_email_address(self, addr): + # Input: an email address, e.g. "foo@example.com" + # + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: + # + # <a href="mailto:foo@e + # xample.com">foo + # @example.com</a> + # + # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk + # mailing list: <http://tinyurl.com/yu7ue> + chars = [_xml_encode_email_char_at_random(ch) + for ch in "mailto:" + addr] + # Strip the mailto: from the visible part. + addr = '<a href="%s">%s</a>' \ + % (''.join(chars), ''.join(chars[7:])) + return addr + + def _do_link_patterns(self, text): + """Caveat emptor: there isn't much guarding against link + patterns being formed inside other standard Markdown links, e.g. + inside a [link def][like this]. + + Dev Notes: *Could* consider prefixing regexes with a negative + lookbehind assertion to attempt to guard against this. + """ + link_from_hash = {} + for regex, repl in self.link_patterns: + replacements = [] + for match in regex.finditer(text): + if hasattr(repl, "__call__"): + href = repl(match) + else: + href = match.expand(repl) + replacements.append((match.span(), href)) + for (start, end), href in reversed(replacements): + escaped_href = ( + href.replace('"', '"') # b/c of attr quote + # To avoid markdown <em> and <strong>: + .replace('*', g_escape_table['*']) + .replace('_', g_escape_table['_'])) + link = '<a href="%s">%s</a>' % (escaped_href, text[start:end]) + hash = md5(link).hexdigest() + link_from_hash[hash] = link + text = text[:start] + hash + text[end:] + for hash, link in link_from_hash.items(): + text = text.replace(hash, link) + return text + + def _unescape_special_chars(self, text): + # Swap back in all the special characters we've hidden. + for ch, hash in g_escape_table.items(): + text = text.replace(hash, ch) + return text + + def _outdent(self, text): + # Remove one level of line-leading tabs or spaces + return self._outdent_re.sub('', text) + + +class MarkdownWithExtras(Markdown): + """A markdowner class that enables most extras: + + - footnotes + - code-color (only has effect if 'pygments' Python module on path) + + These are not included: + - pyshell (specific to Python-related documenting) + - code-friendly (because it *disables* part of the syntax) + - link-patterns (because you need to specify some actual + link-patterns anyway) + """ + extras = ["footnotes", "code-color"] + + +#---- internal support functions + +# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549 +def _curry(*args, **kwargs): + function, args = args[0], args[1:] + def result(*rest, **kwrest): + combined = kwargs.copy() + combined.update(kwrest) + return function(*args + rest, **combined) + return result + +# Recipe: regex_from_encoded_pattern (1.0) +def _regex_from_encoded_pattern(s): + """'foo' -> re.compile(re.escape('foo')) + '/foo/' -> re.compile('foo') + '/foo/i' -> re.compile('foo', re.I) + """ + if s.startswith('/') and s.rfind('/') != 0: + # Parse it: /PATTERN/FLAGS + idx = s.rfind('/') + pattern, flags_str = s[1:idx], s[idx+1:] + flag_from_char = { + "i": re.IGNORECASE, + "l": re.LOCALE, + "s": re.DOTALL, + "m": re.MULTILINE, + "u": re.UNICODE, + } + flags = 0 + for char in flags_str: + try: + flags |= flag_from_char[char] + except KeyError: + raise ValueError("unsupported regex flag: '%s' in '%s' " + "(must be one of '%s')" + % (char, s, ''.join(flag_from_char.keys()))) + return re.compile(s[1:idx], flags) + else: # not an encoded regex + return re.compile(re.escape(s)) + +# Recipe: dedent (0.1.2) +def _dedentlines(lines, tabsize=8, skip_first_line=False): + """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines + + "lines" is a list of lines to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + Same as dedent() except operates on a sequence of lines. Note: the + lines list is modified **in-place**. + """ + DEBUG = False + if DEBUG: + print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\ + % (tabsize, skip_first_line) + indents = [] + margin = None + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + indent = 0 + for ch in line: + if ch == ' ': + indent += 1 + elif ch == '\t': + indent += tabsize - (indent % tabsize) + elif ch in '\r\n': + continue # skip all-whitespace lines + else: + break + else: + continue # skip all-whitespace lines + if DEBUG: print "dedent: indent=%d: %r" % (indent, line) + if margin is None: + margin = indent + else: + margin = min(margin, indent) + if DEBUG: print "dedent: margin=%r" % margin + + if margin is not None and margin > 0: + for i, line in enumerate(lines): + if i == 0 and skip_first_line: continue + removed = 0 + for j, ch in enumerate(line): + if ch == ' ': + removed += 1 + elif ch == '\t': + removed += tabsize - (removed % tabsize) + elif ch in '\r\n': + if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line + lines[i] = lines[i][j:] + break + else: + raise ValueError("unexpected non-whitespace char %r in " + "line %r while removing %d-space margin" + % (ch, line, margin)) + if DEBUG: + print "dedent: %r: %r -> removed %d/%d"\ + % (line, ch, removed, margin) + if removed == margin: + lines[i] = lines[i][j+1:] + break + elif removed > margin: + lines[i] = ' '*(removed-margin) + lines[i][j+1:] + break + else: + if removed: + lines[i] = lines[i][removed:] + return lines + +def _dedent(text, tabsize=8, skip_first_line=False): + """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text + + "text" is the text to dedent. + "tabsize" is the tab width to use for indent width calculations. + "skip_first_line" is a boolean indicating if the first line should + be skipped for calculating the indent width and for dedenting. + This is sometimes useful for docstrings and similar. + + textwrap.dedent(s), but don't expand tabs to spaces + """ + lines = text.splitlines(1) + _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line) + return ''.join(lines) + + +class _memoized(object): + """Decorator that caches a function's return value each time it is called. + If called later with the same arguments, the cached value is returned, and + not re-evaluated. + + http://wiki.python.org/moin/PythonDecoratorLibrary + """ + def __init__(self, func): + self.func = func + self.cache = {} + def __call__(self, *args): + try: + return self.cache[args] + except KeyError: + self.cache[args] = value = self.func(*args) + return value + except TypeError: + # uncachable -- for instance, passing a list as an argument. + # Better to not cache than to blow up entirely. + return self.func(*args) + def __repr__(self): + """Return the function's docstring.""" + return self.func.__doc__ + + +def _xml_oneliner_re_from_tab_width(tab_width): + """Standalone XML processing instruction regex.""" + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,%d} + (?: + <\?\w+\b\s+.*?\?> # XML processing instruction + | + <\w+:\w+\b\s+.*?/> # namespaced single tag + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width) + +def _hr_tag_re_from_tab_width(tab_width): + return re.compile(r""" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in \1 + [ ]{0,%d} + <(hr) # start tag = \2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + """ % (tab_width - 1), re.X) +_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width) + + +def _xml_encode_email_char_at_random(ch): + r = random() + # Roughly 10% raw, 45% hex, 45% dec. + # '@' *must* be encoded. I [John Gruber] insist. + if r > 0.9 and ch != "@": + return ch + elif r < 0.45: + # The [1:] is to drop leading '0': 0x63 -> x63 + return '&#%s;' % hex(ord(ch))[1:] + else: + return '&#%s;' % ord(ch) + +def _hash_text(text): + return 'md5:'+md5(text.encode("utf-8")).hexdigest() + + + +text = """\ +Dies ist ein Text. + +--- + +* Test +* Mu +* Blah +""" + +#markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH, +# safe_mode=None, extras=None, link_patterns=None): +#html = markdown(text, html4tags=False) +# +#print html + + + +@set_hook("read") +def read(params): + file = params.file + if file.rel_path.endswith(".md"): + file.render = "html" + f = file.read_keywords() + return f.read() + + +_markdown = None + +@set_hook("htmlize") +def htmlize(params): + """Parse Markdown and convert it to HTML :-)""" + + file = params.file + if not file.rel_path.endswith(".md"): + return + + global _markdown + if not _markdown: + _markdown = Markdown(extras={ + "code-friendly":True, + "xml":True, + "demote-headers":1, + "code-color":{}}) + html = _markdown.convert(params.file.contents, params.file.input_encoding) + #print type(html) + #print html + return html diff --git a/plugins/read_rst.py b/plugins/read_rst.py new file mode 100644 index 0000000..e8e3a36 --- /dev/null +++ b/plugins/read_rst.py @@ -0,0 +1,76 @@ +# -*- coding: iso-8859-1 -*- +from webber import * +from docutils.writers import html4css1 +from docutils import core + + +@set_hook("read") +def read(params): + file = params.file + if file.rel_path.endswith(".rst"): + file.render = "html" + f = file.read_keywords() + return f.read() + + + +class WebHTMLTranslator(html4css1.HTMLTranslator): + doctype = "" + content_type = "<!--%s-->" + generator = "<!--%s-->" + + def __init__(self, document): + html4css1.HTMLTranslator.__init__(self, document) + self.head_prefix = [] + self.body_prefix = [] + self.stylesheet = [] + self.body_suffix = [] + self.section_level = 1 + + def visit_system_message(self, node): + pass + + def visit_document (self, node): + pass + + def depart_document (self, node): + pass + +class WebWriter(html4css1.Writer): + def __init__ (self): + html4css1.Writer.__init__(self) + self.translator_class = WebHTMLTranslator + + +@set_hook("htmlize") +def htmlize(params): + "Parse text as RST and convert it to HTML" + + file = params.file + if not file.rel_path.endswith(".rst"): + return + + contents = file.contents + + settings = { + # cloak email addresses to reduce spam + 'cloak_email_addresses': 1, + # Emit headers as H2, because H1 is already used + 'doctitle_xform': False, + 'strip_comments': 'true', + #'dump_pseudo_xml': 'true', + #'dump_settings': 'true', + #'dump_transforms': 'true', + # TODO: language_code? + } + # http://docutils.sourceforge.net/docs/dev/hacking.html + # /usr/share/doc/python-docutils/ + document = core.publish_doctree( + source_path=params.file.rel_path, + source=contents, + settings_overrides=settings) + return core.publish_from_doctree(document, + writer=WebWriter(), + writer_name='html', + destination_path=params.file.rel_path, + settings_overrides=settings) diff --git a/plugins/skeleton.py b/plugins/skeleton.py new file mode 100644 index 0000000..8d4305f --- /dev/null +++ b/plugins/skeleton.py @@ -0,0 +1,159 @@ +# -*- coding: iso-8859-1 -*- +from webber import * + + +# +# The hook "addoptions" can be used by plugins to add their own +# command line options. +# +# params.parser contains the optparse based parser +# +@set_hook("addoptions") +def test_addoptions(params): + #print "in skeleton.addoptions" + params.parser.add_option("-V", "--test_verbose", action="count", + dest="test_verbose", default=0, + help="print status messages to stdout") + + +# +# After the command-line options have been processed and incorporated into +# config object, the hook "checkconfig" is called. Here each plugin can +# check if the specified configurations are sane. +# +# params is empty, use cfg instead +# +@set_hook("checkconfig") +def checkconfig(params): + if cfg.test_verbose: + print "in skeleton.checkconfig" + #cfg.blah = "muh" + + +# +# Just before walking the directory tree, the hook "start" +# get's called. +# +# param is empty +# +@set_hook("start") +def finish(params): + if cfg.test_verbose: + print "in skeleton.start" + + +# +# For each file that is not excluded (and not in an excluded directory, the +# hook "read" is called. Usually a reader-plugin (e.g. "rst" or +# "markdown") looks at the file extension of the file parameter. +# +# If the plugin declares itself responsible for this file, it should return +# the contents of the file. It also should set file.reader to some text +# string that describes itself. +# +# params.direc contains a "class Directory" object +# params.file contains a "class File" object +# +@set_hook("read") +def read(params): + if cfg.test_verbose: + print "in skeleton.read", params.file.rel_path + #return "contents of file" + + +# +# After a file has been read in, any plugin can filter it's raw +# text. +# +# params.direc contains the "class Directory" object +# params.file has the "class File" object +# params.contents contains the text +# +@set_hook("filter") +def filter(params): + if cfg.test_verbose: + print "in skeleton.filter", params.file.rel_path + if cfg.verbose > 6: + params.contents = "contents deleted by skeleton.filter" + + +# +# "scan" should scan for meta-data, mostly for links. +# +# params.direc contains the "class Directory" object +# params.file has the "class File" object +# params.file.contents contains the text +# +@set_hook("scan") +def scan(params): + if cfg.test_verbose: + print "in skeleton.scan", params.file.rel_path + + +# +# "scan_done" is called once after all files have been scanned +# +# params is empty +# +@set_hook("scan_done") +def scan_done(params): + if cfg.test_verbose: + print "in skeleton.scan_done" + + +# +# The "htmlize" converts the contents into html. The +# first htmlize hook that returs anything wins, no other +# htmlize hooks will be called. +# +# params.direc contains the "class Directory" object +# params.file has the "class File" object +# params.file.contents contains the text +# +@set_hook("htmlize") +def htmlize(params): + if cfg.test_verbose: + print "in skeleton.htmlize", params.file.rel_path + + +# +# The "linkify" hook converts any link to html. +# +# params.direc contains the "class Directory" object +# params.file has the "class File" object +# params.file.contents contains body text of the page +# +@set_hook("linkify") +def linkify(params): + if cfg.test_verbose: + print "in skeleton.linkify", params.file.rel_path + + +# +# At the very end of the program execution, the hook "finish" +# get's called. +# +# params is empty +# +@set_hook("finish") +def finish(params): + if cfg.test_verbose: + print "in skeleton.finish" + + + +# TODO: Description missing +@set_macro("sample") +def sample_macro(params): + if cfg.test_verbose: + print "in macro skeleton.sample_macro, params:", params + return "{ output of sample macro }" + + + +# TODO: Description missing +@set_function("func") +def sample_func(): + if cfg.test_verbose: + print "in macro skeleton.sample_func" + return "{ output from sample function }" diff --git a/plugins/template_mako.py b/plugins/template_mako.py new file mode 100644 index 0000000..b8a4db6 --- /dev/null +++ b/plugins/template_mako.py @@ -0,0 +1,75 @@ +# -*- coding: iso-8859-1 -*- +from webber import * +from mako.lookup import TemplateLookup +import os + +""" +The make template renders a *.tmpl file which can contain things like + + ${file} the current File object + ${body} HTML for the main contents + ${rootpath} (relative!) path to the web site root directory + ${description} used for meta= + ${keywords} used for meta= + + ... and also all functions decorated with "@set_function(name)". +""" + + +template_cache = {} + +def get_template(file): + filename = file.template + extra_dir = os.path.split(file.path)[0] + if not filename.endswith('.tmpl'): + filename += '.tmpl' + key = "%s %s" % (filename, extra_dir) + if template_cache.has_key(key): + return template_cache[key] + else: + lookup = TemplateLookup( + directories = (extra_dir, file.style_dir), + output_encoding = file.output_encoding, + encoding_errors='replace', + filesystem_checks = False) + tmpl = lookup.get_template(filename) + + template_cache[key] = tmpl + return tmpl + + +@set_hook("pagetemplate") +def pagetemplate(params): + #print "in webber_template_mako.pagetemplate" + #print params.file + + kw = {} + kw["file"] = params.file + if isinstance(params.file.contents, unicode): + kw["body"] = params.file.contents + else: + kw["body"] = unicode(params.file.contents, 'iso-8859-1') + + #print "path:", params.file.out_path + root = [] + for i in range(params.file.out_path.count("/")): + root.append("..") + #print "root:", root + root = "/".join(root) + if root: + root = root + "/" + #print "root:", root + kw["rootpath"] = root + try: + kw["description"] = params.file.description + except: + kw["description"] = "" + try: + kw['keywords'] = params.file.keywords + except: + kw['keywords'] = [] + kw.update(functions) + tmpl = get_template(params.file) + + contents = tmpl.render(**kw) + return contents diff --git a/webber b/webber new file mode 100755 index 0000000..468b9a2 --- /dev/null +++ b/webber @@ -0,0 +1,26 @@ +#!/usr/bin/python +# -*- coding: iso-8859-1 -*- + +import sys +from webber import main + +if __name__ == "__main__": + if '--profile' in sys.argv: + + sys.argv.remove('--profile') + print 'Starting profile' + + import hotshot, hotshot.stats + prof = hotshot.Profile('newbuild.prof') + prof.runcall(main) + prof.close() + + print 'Profile completed' + + stats = hotshot.stats.load('newbuild.prof') + #stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(50) + + else: + main() diff --git a/webber.py b/webber.py new file mode 100644 index 0000000..abb13d4 --- /dev/null +++ b/webber.py @@ -0,0 +1,745 @@ +# -*- coding: iso-8859-1 -*- +import sys, os, optparse, fnmatch, stat, re, time, types +from config import Holder + + + +############################################################################### +# +# Global variables +# + +__all__ = [ + # Globals + "cfg", # configuration from webber.ini + "directories", # global hash of directories, by rel_path + "files", # global hash of files, by rel_path + "functions", # all exported template functions + + # Functions + "set_hook", # decorator for hook-functions + "set_macro", # define macro + "set_function", # define functions for the template + "get_file_for", + "get_link_from", + "get_current_file", # because mako-called functions cannot access the + # current File object + "get_program_directory", + "log", # misc logging functions + "info", + "warning", + "error", + ] + + + +############################################################################### +# +# Configuration class +# + +cfg = Holder() + + + +directories = {} + +class Directory(Holder): + """This stores per-directory information. Each file has a pointer + to a directory object.""" + + def __init__(self, **kw): + Holder.__init__(self, **kw) + directories[kw["rel_path"]] = self + + +files = {} +current_file = None + +class File(Holder): + """This stores file information.""" + + def __init__(self, **kw): + Holder.__init__(self, **kw) + files[kw["rel_path"]] = self + self.render = None + mtime = os.stat(self.path)[stat.ST_MTIME] + self.mtime = mtime + self.ctime = mtime + #print self.keys() + + reKeywords = re.compile(r'(\S+)\s*:\s*(.*)') + #reIsoDate = re.compile(r'(\d\d\d\d)-(\d\d)-(\d\d)') + + def read_keywords(self, terminate_line=""): + """Opens the file and reads "key: value" pairs on the top of it. Returns + the open file handle for further processing by some plugins/read_*.py code.""" + f = open(self.path) + while True: + s = f.readline().strip() + if s==terminate_line: + break + m = self.reKeywords.match(s) + if not m: + warning("%s: wrong 'key: value' line '%s'" % (self.rel_path, s)) + break + key = m.group(1).lower() + val = m.group(2) + + if key == "mtime": + val = iso_to_time(val) + + if key == "ctime": + val = iso_to_time(val) + + if key == "title": + if not self.has_key("linktitle"): + self["linktitle"] = val + + #print self.rel_path, key, val + self[key] = val + return f + + +_get_file_for_cache = {} +def get_file_for(name): + """webber.files is an hash of File objects, but keyed on the real file name. + This function returns a File object for a specific linktitle.""" + + try: + return _get_file_for_cache[name] + except: + pass + + #print "get_file_for:", name + for s in files: + f = files[s] + try: + if f.linktitle == name: + #print " via linktitle:", s + _get_file_for_cache[name] = f + return f + except: + pass + # Allow exact match as well + if s == name: + #print " exact:", s + _get_file_for_cache[name] = f + return f + #print " not found" + + +def relpath(base_path, target): + """\ + Return a relative path to the target from either the current directory + or an optional base directory. + + Base can be a directory specified either as absolute or relative + to current directory.""" + # Code from http://code.activestate.com/recipes/302594/ + + def commonpath(a, b): + """Returns the longest common to 'paths' path. + + Unlike the strange commonprefix: + - this returns valid path + - accepts only two arguments + """ + if a == b: + return a + while len(a) > 0: + if a == b: + return a + if len(a) > len(b): + a = os.path.dirname(a) + else: + b = os.path.dirname(b) + return None + + base_path = os.path.normpath(os.path.normcase(base_path)) + target = os.path.normpath(os.path.normcase(target)) + + if base_path == target: + return '.' + + # On the windows platform the target may be on a different drive. + if os.path.splitdrive(base_path)[0] != os.path.splitdrive(target)[0]: + return None + + common_path_len = len(commonpath(base_path, target)) + + # If there's no common prefix decrease common_path_len should be less by 1 + base_drv, base_dir = os.path.splitdrive(base_path) + if common_path_len == len(base_drv) + 1: + common_path_len -= 1 + + # if base_path is root directory - no directories up + if base_dir == os.sep: + dirs_up = 0 + else: + dirs_up = base_path[common_path_len:].count(os.sep) + + ret = os.sep.join([os.pardir] * dirs_up) + if len(target) > common_path_len: + ret = os.path.join(ret, target[common_path_len + 1:]) + + return ret + + +def get_link_from(source, dest): + #print "get_link_from", source, dest + source = get_file_for(source) + if not source: + return "." + dest = get_file_for(dest) + if not dest: + return "." + rel_path = relpath(directories[source.direc].abs_path, directories[dest.direc].abs_path) + try: + out_path = dest.out_path + except: + out_path = "" + #print dest + rel_path = os.path.join(rel_path, os.path.split(out_path)[1]) + if rel_path.startswith("./"): + rel_path = rel_path[2:] + #print " from path:", source.out_path + #print " to path: ", out_path + #print " rel path: ", rel_path + return rel_path + + + +############################################################################### +# +# Utility functions +# + + +def get_program_directory(): + """Return the path to the directory containing the build software.""" + import __main__ + path = os.path.dirname(__main__.__file__) + if path == "": + path = os.getcwd() + return path + + + +############################################################################### +# +# Logging +# +# 1 Error +# 2 Warning +# 3 Info +# 4 Log +# 5... Debug +# +def log(s, level=4): + if level>4: + indent = " " * (level-4) + else: + indent = "" + if level <= cfg.verbose: + print "%s%s" % (indent, s) + +def error(s): + log("error: %s" % s, 1) + +def warning(s): + log("warning: %s" % s, 2) + +def info(s): + log("info: %s" % s, 3) + + + +############################################################################### +# +# Hooks and plugins +# + + +# IkiWiki does something like this: +# At startup: +# getopt modify ARGV +# checkconfig check configuration +# refresh allow plugins to build source files +# While scanning files: +# needsbuild detect if page needs to be rebuild +# filter arbitrary changes +# scan collect metadata +# While rendering files: +# filter arbitrary changes +# preprocess execute macros +# linkify change wikilinks into links +# htmlize turns text into html +# sanitize sanitize html +# templatefile allows changing of the template on a per-file basis +# pagetemplate fill template with page +# format similar to sanitize, but act on whole page body +# At the end: +# savestate plugins can save their state +# +# +# We do something like this: +# +# At startup: +# addoptions allow plugins to add command-line options +# checkconfig check configuration +# start +# While reading files: +# read ask any reader (plugins!) to read the file +# filter ask anybody to filter the contents +# While scanning files: +# scan called per file, let plugins act on file data +# scan_done Allows post-processing of scanned data +# While rendering files: +# htmlize turns text into html-part +# linkify convert link macros to HTML +# pagetemplate ask template engine (plugin!) to generate HTML out +# of template and body part +# At the end: +# finish +# +# For more info, see plugins/skeleton.py +# + + +hooks = {} + +def load_plugins(): + """Loads all plugins in the plugins directory.""" + sys.path.append(os.path.join(get_program_directory(), "plugins")) + for s in cfg.plugins: + #print "import:", s + #try: + exec "import %s" % s + #except: + # print "Could not import plugin '%s'" % s + # sys.exit(1) + + +def set_hook(name, last=False): + """This is a decorator, used for mostly plugins, which can append the + attached function to some hook""" + #print "set_hook, name", name + def inside_set_hook(func): + #print "inside_set_hook, function", func.__name__, "name", name, "last", last + if not hooks.has_key(name): + hooks[name] = [] + func.last = last + hooks[name].append(func) + return func + return inside_set_hook + + +def run_hooks(name, **kw): + """This runs hooks that are marked with @set_hook("name")""" + #print "run_hooks:", name + args = Holder(**kw) + args.setDefault("stop_if_result", False) + args.setDefault("return_holder", True) + + # Need to wrap this because run_hooks() is called before + # cfg.verbose has been set + try: + log("running hook '%s'" % name, level=7) + except: + AttributeError + + if hooks.has_key(name): + delay = [] + for func in hooks[name]: + if func.last: + delay.append(func) + continue + #print "running hook:", func + res = func(args) + if args.stop_if_result and res: + return res + for func in delay: + #print "running hook (last):", func.__name__ + res = func(args) + if args.stop_if_result and res: + return res + else: + return None + if args.return_holder: + return args + else: + return res + + +macros = {} + +def set_macro(name): + """This is a decorator, used for mark executable macros""" + + #print "set_macro, name", name + def inside_set_macro(func): + #print "inside_set_macro, function", func.__name__, "name", name + if macros.has_key(name): + error("macro %s already defined" % name) + return + macros[name] = func + return func + return inside_set_macro + +functions = {} + +def set_function(name): + """This is a decorator, used for mark executable functions""" + + #print "set_function, name", name + def inside_set_function(func): + #print "inside_set_function, function", func.__name__, "name", name + if functions.has_key(name): + error("function %s already defined" % name) + return + functions[name] = func + return func + return inside_set_function + + +def iso_to_time(val): + try: + t = time.strptime(val, "%Y-%m-%d %H:%M") + except ValueError: + try: + t = time.strptime(val, "%Y-%m-%d") + except ValueError: + warning("%s: wrong ISO format in '%s'" % (self.rel_path, s)) + return int(time.mktime(t)) + +@set_function("format_date") +def format_date(timestamp): + return time.strftime(cfg.date_format, time.localtime(timestamp)) + +@set_function("get_current_file") +def get_current_file(): + return current_file + + + + + +############################################################################### +# +# File reading +# + +def read_file(direc, file): + """ + Ask if some reader wants to read this file. If that happens, + and the reader reads the file in, the contents is also filtered. + + The result is stored in file.contents + + @param direc: directory the file is in + @type direc: a L{Directory} object + @param file: file to process + @type file: a L{File} object + """ + + contents = run_hooks("read", + direc=direc, + file=file, + stop_if_result=True, + return_holder=False) + if not contents: + return + + log("filtering file %s" % file.rel_path, level=6) + file.contents = contents + res = run_hooks("filter", + direc=direc, + file=file) + + +def walk_tree(dirpath): + """ + Walks the directory rooted at 'path', and calls func(dirpath, filenames) + for each directory. + + @param dirpath: starting directory + @type dirpath: string + @param func: function to call for found dirs/files + @type func: function(dirpath, filenames) + """ + + info("Reading files ...") + + def walk(dirpath): + #print "walk", dirpath + rel_path = dirpath[len(cfg.in_dir):] + direc = Directory(rel_path=rel_path, abs_path=dirpath) + direc.inheritFrom(cfg) + + if not rel_path: rel_path = "." + log("reading directory %s" % rel_path, level=4) + + for s in os.listdir(dirpath): + full_path = os.path.join(dirpath, s) + ok = True + if os.path.isdir(full_path): + for e in cfg.exclude_dir: + if fnmatch.fnmatchcase(s, e): + log("ignoring directory %s" % s, level=7) + ok = False + break + if ok: + #print "DIR", s + walk(full_path) + if os.path.isfile(full_path): + for e in cfg.exclude_files: + if fnmatch.fnmatchcase(s, e): + log("ignoring file %s" % s, level=7) + ok = False + break + if ok: + #print "FILE", s + rel_path = relpath(cfg.in_dir, full_path) + log("reading file %s" % rel_path, level=5) + file = File( + path = full_path, + rel_path = rel_path, + direc = direc.rel_path + ) + file.inheritFrom(direc) + read_file(direc, file) + + walk(dirpath) + + + +############################################################################### +# +# Rendering +# + +reMacro = re.compile(r''' + \[\[\! # Begin of macro + \s* + ([^\s\]]+) # Macro name + (?: + \s+ # optional space + ([^\]]+) # optional argumens + )? + \]\] # End of macro + ''', re.VERBOSE) +reMacroArgs = re.compile(r''' + ([-_\w]+) # parameter name + (?: + \s* + = + \s* + (?: + "([^"]*)" # single-quoted + | + (\S+) # unquoted + ) + )? + ''', re.VERBOSE) + +def run_macros(file, contents): + def do_macro(m): + name = m.group(1) + #print "\nname:", name + kw = {'name':name} + if m.group(2): + #print "args:", m.group(2) + for m2 in reMacroArgs.finditer(m.group(2)): + #print " param:", m2.group(1) + #print " arg:", m2.group(3) or m2.group(2) + kw[m2.group(1)] = m2.group(3) or m2.group(2) + if macros.has_key(name): + kw["file"] = file + f = macros[name] + s = f(kw) + if type(s) == types.UnicodeType: + s = s.encode("utf-8") + return s + else: + error("macro %s not defined" % name) + s = reMacro.sub(do_macro, contents) + #print s + return s + + +def scan_files(): + info("Scanning files ...") + + for s in files: + file = files[s] + try: + # Just check if the file has contents + contents = file.contents + except: + continue + + direc = directories[file.direc] + + run_hooks("scan", + direc=direc, + file=file) + run_hooks("scan_done") + + +def render_files(): + info("Rendering files ...") + + for fname_in in files: + global current_file + file = files[fname_in] + current_file = file + + # Do we have a renderer? + if file.render is None: + log("unhandled file: %s" % file.rel_path, 7) + continue + + # Is the renderer not the default HTML renderer? + if file.render != "html": + #print file.render, "on", file.rel_path + run_hooks(file.render, + file=file, + stop_if_result=True, + return_holder=False) + continue + + # Run default renderer + direc = directories[file.direc] + + contents = run_macros(file, file.contents) + #print "contents after 'macrorun':", contents + file.contents = contents + + contents = run_hooks("htmlize", + direc=direc, + file=file, + stop_if_result=True, + return_holder=False) + #print "contents after 'htmlize':", contents + if not contents: + continue + file.contents = contents + + # Output-Filename "berechnen" + file.out_path = os.path.splitext(fname_in)[0] + ".html" + + for fname_in in files: + file = files[fname_in] + current_file = file + if not file.has_key("out_path"): + #print "no out_path", file.rel_path + continue + direc = directories[file.direc] + + contents = run_hooks("linkify", + direc=direc, + file=file, + return_holder=False) + #print "contents after 'linkify':", contents + if not contents: + continue + file.contents = contents + + # TODO: einige Fragmente sollen u.U. in eine andere + # Webseite eingebaut werden und sollten daher nicht in + # ein HTML-File landen + contents = run_hooks("pagetemplate", + direc=direc, + file=file, + stop_if_result=True, + return_holder=False) + #print "contents after 'pagetemplate':", contents + + + # Output-Directory erzeugen + fname_out = os.path.join(cfg.out_dir, file.out_path) + dir_out = os.path.split(fname_out)[0] + #print "dir_out:", dir_out + try: + os.makedirs(dir_out) + except OSError: + pass + + # TODO: evtl. überprüfen, ob contents == f.read(), dann nicht schreiben + log("writing file %s" % fname_out, level=6) + f = open(fname_out, "w") + f.write(contents) + f.close() + # TODO: Time-Stamps setzen? + + #print file.mtime, file.get("ctime","?") + #print direc.keys() + + + +############################################################################### +# +# Main program +# + +@set_hook("addoptions") +def addoptions(params): + parser = params["parser"] + parser.add_option("-i", "--in", dest="in_dir", default="in", + help="input directory", + metavar="DIR") + parser.add_option("-o", "--out", dest="out_dir", default="out", + help="output directory", + metavar="DIR") + parser.add_option("--style-dir", dest="style_dir", default="in/style", + help="directory with style sheets", + metavar="STYLE") + parser.add_option("-v", "--verbose", action="count", + dest="verbose", default=3, + help="print status messages to stdout") + parser.add_option("-k", "--keepgoing", dest="keepgoing", + action="store_true", default=False, + help="keep going past errors if possible") + + return parser + + +@set_hook("checkconfig", last=True) +def checkconfig(params): + # Ensure absolute paths that end in '/'. + cfg.in_dir = os.path.join(os.getcwd(), cfg.in_dir).rstrip('/') + '/' + assert cfg.in_dir.endswith('/') + + +def main(): + global cfg + + # Get configuration from webber.ini + cfg.load('webber.conf') + + # Now load all plugins + load_plugins() + + # Create parser and allow plugins to add their own command line stuff + parser = optparse.OptionParser() + args = run_hooks("addoptions", parser=parser) + (options, args) = parser.parse_args() + + # Recast options into a Holder object, this allows + # us to use it for Mapping.inheritFrom() + options = Holder(**parser.values.__dict__) + + # link contents of webber.ini into cfg and set some defaults, + # then let plugins fixup things in cfg.* + cfg.inheritFrom(options) + cfg.setDefault("exclude_dir", ["plugins"]) + run_hooks("checkconfig") + + run_hooks("start") + + walk_tree(cfg.in_dir) + scan_files() + render_files() + + run_hooks("finish")