--- /dev/null
+/webber.conf
+?
+*.pyc
+*.pyo
+/out
+/cache.tmp
+/sitemap.xml
+/old.*
+/newbuild.prof
+/in/files
--- /dev/null
+
+all: webber.conf
+ ./webber
+
+profile:
+ ./webber --profile
+
+clean:
+ rm -f *.pyc plugins/*.pyc
+
+realclean: clean
+ rm -rf out
+
+# Automatically create webber.conf:
+ifeq ($(wildcard webber.conf),)
+webber.conf: in/webber.conf
+ ln -s in/webber.conf
+endif
--- /dev/null
+import sys, os, codecs, types
+
+
+try:
+ import encodings.utf_32
+ has_utf32 = True
+except:
+ has_utf32 = False
+
+class ConfigInputStream(object):
+ """
+ An input stream which can read either ANSI files with default encoding
+ or Unicode files with BOMs.
+
+ Handles UTF-8, UTF-16LE, UTF-16BE. Could handle UTF-32 if Python had
+ built-in support.
+ """
+ def __init__(self, stream):
+ """
+ Initialize an instance.
+
+ @param stream: The underlying stream to be read. Should be seekable.
+ @type stream: A stream (file-like object).
+ """
+ encoding = None
+ signature = stream.read(4)
+ used = -1
+ if has_utf32:
+ if signature == codecs.BOM_UTF32_LE:
+ encoding = 'utf-32le'
+ elif signature == codecs.BOM_UTF32_BE:
+ encoding = 'utf-32be'
+ if encoding is None:
+ if signature[:3] == codecs.BOM_UTF8:
+ used = 3
+ encoding = 'utf-8'
+ elif signature[:2] == codecs.BOM_UTF16_LE:
+ used = 2
+ encoding = 'utf-16le'
+ elif signature[:2] == codecs.BOM_UTF16_BE:
+ used = 2
+ encoding = 'utf-16be'
+ else:
+ used = 0
+ if used >= 0:
+ stream.seek(used)
+ if encoding:
+ reader = codecs.getreader(encoding)
+ stream = reader(stream)
+ self.stream = stream
+ self.encoding = encoding
+
+ def read(self, size):
+ if (size == 0) or (self.encoding is None):
+ rv = self.stream.read(size)
+ else:
+ rv = u''
+ while size > 0:
+ rv += self.stream.read(1)
+ size -= 1
+ return rv
+
+ def close(self):
+ self.stream.close()
+
+ def readline(self):
+ if self.encoding is None:
+ line = ''
+ else:
+ line = u''
+ while True:
+ c = self.stream.read(1)
+ if c:
+ line += c
+ if c == '\n':
+ break
+ return line
+
+
+WORD = 'a'
+NUMBER = '9'
+STRING = '"'
+EOF = ''
+LCURLY = '{'
+RCURLY = '}'
+LBRACK = '['
+LBRACK2 = 'a['
+RBRACK = ']'
+COMMA = ','
+COLON = ':'
+MINUS = '-'
+TRUE = 'True'
+FALSE = 'False'
+NONE = 'None'
+
+WORDCHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"
+
+if sys.platform == 'win32':
+ NEWLINE = '\r\n'
+elif os.name == 'mac':
+ NEWLINE = '\r'
+else:
+ NEWLINE = '\n'
+
+class ConfigError(Exception):
+ """
+ This is the base class of exceptions raised by this module.
+ """
+ pass
+
+class ConfigFormatError(ConfigError):
+ """
+ This is the base class of exceptions raised due to syntax errors in
+ configurations.
+ """
+ pass
+
+class ConfigReader(object):
+ """
+ This internal class implements a parser for configurations.
+
+ >>> conf = Holder()
+ >>> cr = ConfigReader(conf)
+ >>> cr.load("webber.conf")
+ >>> print conf.lang
+ de
+ >>> import cStringIO
+ >>> s = cStringIO.StringIO("num: 1\\nlang: 'us'")
+ >>> cr.load(s)
+ >>> print conf.lang
+ us
+ >>> print conf.num
+ 1
+ >>> s = cStringIO.StringIO("arr: [1,2]")
+ >>> cr.load(s)
+ >>> print conf.arr
+ [1, 2]
+ >>> s = cStringIO.StringIO("assoc: {a:11, b:22}")
+ >>> cr.load(s)
+ >>> print conf.assoc["a"], conf.assoc["b"]
+ 11 22
+ """
+
+ def __init__(self, config):
+ self.filename = None
+ self.config = config
+ self.lineno = 0
+ self.colno = 0
+ self.lastc = None
+ self.last_token = None
+ self.whitespace = ' \t\r\n'
+ self.quotes = '\'"'
+ self.punct = ':-+*/%,.{}[]()@`$'
+ self.digits = '0123456789'
+ self.wordchars = '%s' % WORDCHARS # make a copy
+ self.identchars = self.wordchars + self.digits
+ self.pbchars = []
+ self.pbtokens = []
+
+ def location(self):
+ """
+ Return the current location (filename, line, column) in the stream
+ as a string.
+
+ Used when printing error messages,
+
+ @return: A string representing a location in the stream being read.
+ @rtype: str
+ """
+ return "%s(%d,%d)" % (self.filename, self.lineno, self.colno)
+
+ def getChar(self):
+ """
+ Get the next char from the stream. Update line and column numbers
+ appropriately.
+
+ @return: The next character from the stream.
+ @rtype: str
+ """
+ if self.pbchars:
+ c = self.pbchars.pop()
+ else:
+ c = self.stream.read(1)
+ self.colno += 1
+ if c == '\n':
+ self.lineno += 1
+ self.colno = 1
+ return c
+
+ def __repr__(self):
+ return "<ConfigReader at 0x%08x>" % id(self)
+
+ __str__ = __repr__
+
+ def getToken(self):
+ """
+ Get a token from the stream. String values are returned in a form
+ where you need to eval() the returned value to get the actual
+ string. The return value is (token_type, token_value).
+
+ Multiline string tokenizing is thanks to David Janes (BlogMatrix)
+
+ @return: The next token.
+ @rtype: A token tuple.
+ """
+ if self.pbtokens:
+ return self.pbtokens.pop()
+ stream = self.stream
+ token = ''
+ tt = EOF
+ while True:
+ c = self.getChar()
+ if not c:
+ break
+ if c in self.quotes:
+ token = c
+ quote = c
+ tt = STRING
+ escaped = False
+ multiline = False
+ c1 = self.getChar()
+ if c1 == quote:
+ c2 = self.getChar()
+ if c2 == quote:
+ multiline = True
+ token += quote
+ token += quote
+ else:
+ self.pbchars.append(c2)
+ self.pbchars.append(c1)
+ else:
+ self.pbchars.append(c1)
+ while True:
+ c = self.getChar()
+ if not c:
+ break
+ token += c
+ if (c == quote) and not escaped:
+ if not multiline or (len(token) >= 6 and token.endswith(token[:3]) and token[-4] != '\\'):
+ break
+ if c == '\\':
+ escaped = not escaped
+ else:
+ escaped = False
+ if not c:
+ raise ConfigFormatError('%s: Unterminated quoted string: %r, %r' % (self.location(), token, c))
+ break
+ if c in self.whitespace:
+ self.lastc = c
+ continue
+ elif c in self.punct:
+ token = c
+ tt = c
+ if (self.lastc == ']') or (self.lastc in self.identchars):
+ if c == '[':
+ tt = LBRACK2
+ break
+ elif c in self.digits:
+ token = c
+ tt = NUMBER
+ while True:
+ c = self.getChar()
+ if not c:
+ break
+ if c in self.digits:
+ token += c
+ elif (c == '.') and token.find('.') < 0:
+ token += c
+ else:
+ if c and (c not in self.whitespace):
+ self.pbchars.append(c)
+ break
+ break
+ elif c in self.wordchars:
+ token = c
+ tt = WORD
+ c = self.getChar()
+ while c and (c in self.identchars):
+ token += c
+ c = self.getChar()
+ if c: # and c not in self.whitespace:
+ self.pbchars.append(c)
+ if token == "True":
+ tt = TRUE
+ elif token == "False":
+ tt = FALSE
+ elif token == "None":
+ tt = NONE
+ break
+ else:
+ raise ConfigFormatError('%s: Unexpected character: %r' % (self.location(), c))
+ if token:
+ self.lastc = token[-1]
+ else:
+ self.lastc = None
+ self.last_token = tt
+ return (tt, token)
+
+ def load(self, stream):
+ """
+ Load the configuration from the specified stream.
+
+ @param stream: A stream from which to load the configuration.
+ @type stream: A stream (file-like object).
+ @param suffix: The suffix of this configuration in the parent
+ configuration. Should be specified whenever the parent is not None.
+ @raise ConfigError: If parent is specified but suffix is not.
+ @raise ConfigFormatError: If there are syntax errors in the stream.
+ """
+
+ if type(stream) == types.StringType:
+ stream = ConfigInputStream(file(stream, 'rb'))
+
+ self.setStream(stream)
+ self.token = self.getToken()
+ self.parseMappingBody(self.config)
+ if self.token[0] != EOF:
+ raise ConfigFormatError('%s: expecting EOF, found %r' % (self.location(), self.token[1]))
+
+ def setStream(self, stream):
+ """
+ Set the stream to the specified value, and prepare to read from it.
+
+ @param stream: A stream from which to load the configuration.
+ @type stream: A stream (file-like object).
+ """
+ self.stream = stream
+ if hasattr(stream, 'name'):
+ filename = stream.name
+ else:
+ filename = '?'
+ self.filename = filename
+ self.lineno = 1
+ self.colno = 1
+
+ def match(self, t):
+ """
+ Ensure that the current token type matches the specified value, and
+ advance to the next token.
+
+ @param t: The token type to match.
+ @type t: A valid token type.
+ @return: The token which was last read from the stream before this
+ function is called.
+ @rtype: a token tuple - see L{getToken}.
+ @raise ConfigFormatError: If the token does not match what's expected.
+ """
+ if self.token[0] != t:
+ raise ConfigFormatError("%s: expecting %s, found %r" % (self.location(), t, self.token[1]))
+ rv = self.token
+ self.token = self.getToken()
+ return rv
+
+ def parseMappingBody(self, parent):
+ """
+ Parse the internals of a mapping, and add entries to the provided
+ L{Mapping}.
+
+ @param parent: The mapping to add entries to.
+ @type parent: A L{Mapping} instance.
+ """
+ while self.token[0] in [WORD, STRING]:
+ self.parseKeyValuePair(parent)
+
+ def parseKeyValuePair(self, parent):
+ """
+ Parse a key-value pair, and add it to the provided L{Mapping}.
+
+ @param parent: The mapping to add entries to.
+ @type parent: A L{Mapping} instance.
+ @raise ConfigFormatError: if a syntax error is found.
+ """
+ tt, tv = self.token
+ if tt == WORD:
+ key = tv
+ suffix = tv
+ elif tt == STRING:
+ key = eval(tv)
+ suffix = '[%s]' % tv
+ else:
+ msg = "%s: expecting word or string, found %r"
+ raise ConfigFormatError(msg % (self.location(), tv))
+ self.token = self.getToken()
+ # for now, we allow key on its own as a short form of key : True
+ if self.token[0] == COLON:
+ self.token = self.getToken()
+ value = self.parseValue(parent, suffix)
+ else:
+ value = True
+ try:
+ parent[key] = value
+ except Exception, e:
+ raise ConfigFormatError("%s: %s, %r" % (self.location(), e,
+ self.token[1]))
+ tt = self.token[0]
+ if tt not in [EOF, WORD, STRING, RCURLY, COMMA]:
+ msg = "%s: expecting one of EOF, WORD, STRING, RCURLY, COMMA, found %r"
+ raise ConfigFormatError(msg % (self.location(), self.token[1]))
+ if tt == COMMA:
+ self.token = self.getToken()
+
+ def parseValue(self, parent, suffix):
+ """
+ Parse a value.
+
+ @param parent: The container to which the value will be added.
+ @type parent: A L{Container} instance.
+ @param suffix: The suffix for the value.
+ @type suffix: str
+ @return: The value
+ @rtype: any
+ @raise ConfigFormatError: if a syntax error is found.
+ """
+ tt = self.token[0]
+ if tt in [STRING, WORD, NUMBER, TRUE, FALSE, NONE, MINUS]:
+ rv = self.parseScalar()
+ elif tt == LBRACK:
+ rv = self.parseSequence(parent, suffix)
+ elif tt in [LCURLY]:
+ rv = self.parseMapping(parent, suffix)
+ else:
+ raise ConfigFormatError("%s: unexpected input: %r" % (self.location(), self.token[1]))
+ return rv
+
+ def parseSequence(self, parent, suffix):
+ """
+ Parse a sequence.
+
+ @param parent: The container to which the sequence will be added.
+ @type parent: A L{Container} instance.
+ @param suffix: The suffix for the value.
+ @type suffix: str
+ @return: a L{Sequence} instance representing the sequence.
+ @rtype: L{Sequence}
+ @raise ConfigFormatError: if a syntax error is found.
+ """
+ rv = []
+ self.match(LBRACK)
+ tt = self.token[0]
+ while tt in [STRING, WORD, NUMBER, LCURLY, LBRACK, TRUE, FALSE, NONE]:
+ suffix = '[%d]' % len(rv)
+ value = self.parseValue(parent, suffix)
+ rv.append(value)
+ tt = self.token[0]
+ if tt == COMMA:
+ self.match(COMMA)
+ tt = self.token[0]
+ continue
+ self.match(RBRACK)
+ return rv
+
+ def parseMapping(self, parent, suffix):
+ """
+ Parse a mapping.
+
+ @param parent: The container to which the mapping will be added.
+ @type parent: A L{Container} instance.
+ @param suffix: The suffix for the value.
+ @type suffix: str
+ @return: a L{Mapping} instance representing the mapping.
+ @rtype: L{Mapping}
+ @raise ConfigFormatError: if a syntax error is found.
+ """
+ if self.token[0] == LCURLY:
+ self.match(LCURLY)
+ rv = {}
+ self.parseMappingBody(rv)
+ self.match(RCURLY)
+ return rv
+
+ def parseScalar(self):
+ """
+ Parse a scalar - a terminal value such as a string or number, or
+ an L{Expression} or L{Reference}.
+
+ @return: the parsed scalar
+ @rtype: any scalar
+ @raise ConfigFormatError: if a syntax error is found.
+ """
+ tt = self.token[0]
+ if tt in [NUMBER, WORD, STRING, TRUE, FALSE, NONE]:
+ rv = self.token[1]
+ if tt != WORD:
+ rv = eval(rv)
+ self.match(tt)
+ elif tt == MINUS:
+ self.match(MINUS)
+ rv = -self.parseScalar()
+ else:
+ raise ConfigFormatError("%s: unexpected input: %r" %
+ (self.location(), self.token[1]))
+ #print "parseScalar: '%s'" % rv
+ return rv
+
+
+
+class Holder(object):
+ """This is a simple wrapper class so that you can write
+
+ h = Holder(bar=1, baz="test")
+
+ instead of writing
+
+ foo["bar"] = 1
+ baz["bar"] = "test"
+
+ Holder will be the base class for all configurations and objects.
+ """
+
+ def __init__(self, **kw):
+ """Creates a new folder object:
+
+ >>> h = Holder(bar=1, baz="test")
+ >>> print h.bar
+ 1
+ >>> print h.baz
+ test
+ """
+ self.__dict__.update(kw)
+ self._inherit_from = []
+
+ def keys(self):
+ """Return list of stored variables.
+
+ >>> h = Holder(bar=1, baz="test")
+ >>> print sorted(h.keys())
+ ['bar', 'baz']
+ """
+ return filter(lambda x: x[0] != '_', self.__dict__.keys())
+
+ def has_key(self, key):
+ return self.__dict__.has_key(key)
+
+ def setDefault(self, key, value):
+ if not self.__dict__.has_key(key):
+ self.__dict__[key] = value
+
+ def __getitem__(self, key):
+ """Allows access to the variables via obj[name] syntax.
+
+ >>> h = Holder()
+ >>> h.foo = "Hello"
+ >>> print h["foo"]
+ Hello
+ """
+ try:
+ return self.__dict__[key]
+ except:
+ pass
+ for inh in self._inherit_from:
+ try:
+ return inh[key]
+ except:
+ pass
+ raise KeyError(key)
+
+ __getattr__ = __getitem__
+
+ def __setitem__(self,key,val):
+ """Allows access to the variables via obj[name] syntax.
+
+ >>> h = Holder()
+ >>> h["foo"] = "Hello"
+ >>> print h.foo
+ Hello
+ """
+ self.__dict__[key] = val
+
+ def inheritFrom(self, holder):
+ """
+ This allows on Holder to inherit settings from another holder.
+
+ >>> h1 = Holder(a=1, b=2)
+ >>> h2 = Holder(c=3)
+ >>> h2.inheritFrom(h1)
+ >>> print h2.c
+ 3
+ >>> print h2["b"]
+ 2
+ >>> print h2.a
+ 1
+ """
+ self._inherit_from.append(holder)
+
+ def load(self, stream):
+ """
+ >>> conf = Holder()
+ >>> cr = ConfigReader(conf)
+ >>> cr.load("webber.conf")
+ >>> print conf.lang
+ de
+ """
+ cr = ConfigReader(self)
+ cr.load(stream)
+
+ def __repr__(self):
+ return "<%s object: " % self.__class__.__name__ + ",".join(self.keys()) + ">"
+
+
+
+
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
--- /dev/null
+title: Command line options
+linktitle: Cmdline
+parent: Configuration
+ctime: 2009-06-26
+
+= Help =
+
+As usualy, you can get command line help with "`-h`" or "`--help`":
+
+ usage: webber [options]
+
+ options:
+ -h, --help show this help message and exit
+ -i DIR, --in=DIR input directory
+ -o DIR, --out=DIR output directory
+ --style-dir=STYLE directory with style sheets
+ -v, --verbose print status messages to stdout
+ -k, --keepgoing keep going past errors if possible
+ -V, --test_verbose print status messages to stdout
+
+= Input directory =
+
+"`-i`" or "`--in`" defaults to "`in`" and specifies where webber
+search for source files.
+
+You can access this via "`cfg.in_dir`" (or "`file.in_dir`", see
+[[inheritance]]).
+
+= Output directory =
+
+"`-o`" or "`--out`" defaults to "`out`" and specifies where webber
+writes the output files.
+
+
+= Template (Style) =
+
+You can define the style of the generated website via HTML templates. If
+you have more of them, you switch between different ones via "`--style-dir`".
+The default is "`in/style`".
+
+
+= Verbosity =
+
+A common option is "`-v`" (or "`--verbose`") to increase the verbosity. Repeat
+to increase even more.
+
+
+= Continue on errors =
+
+With "`-k`" or "`--keepgoing`" you can tell webber to ignore errors in one
+page and continue with the next page.
+
+
--- /dev/null
+title: Configuration
+parent: Webber
+ctime: 2009-06-26
+
+Configuration happens either the [[commandline]] or
+via the configuration file (described below). All Configurations are
+[[inherited|inheritance]] and or overwritable on a per-directory and
+per-file basis.
+
+The main means for configuration is the config file:
+
+= Format =
+
+Webber expects a `webber.conf` file in it's root directory. It should look like this:
+
+ template: "default"
+ date_format: "%d.%m.%Y"
+ input_encoding: "iso-8859-1"
+ output_encoding: "iso-8859-1"
+ plugins: [
+ "skeleton",
+ "hierarchy",
+ "link",
+ "read_rst",
+ "read_html",
+ "read_copyonly",
+ "read_markdown",
+ "template_mako",
+ ]
+ exclude_dir: [
+ "webber.conf",
+ "*.tmpl",
+ ]
+ exclude_files: [
+ ]
+
+You could also some options with are normally defined by [[commandline]].
+This saves you from specifying them on ever run of webber:
+
+ in_dir: "in"
+ out_dir: "out"
+ style_dir: "in/style"
+ verbose: 5
+
+Beside those entries, you can specify any additional entries that will then
+be available in your plugins or templates.
+
+= Meaning =
+
+== template ==
+
+Used by the [[template_mako]] to select the template.
+
+== date_format ==
+
+Used in `format_date()`.
+
+== input_encoding ==
+
+Encoding ('utf-8', 'iso-8859-1' etc) used for reading files.
+
+== output_encoding ==
+
+Encoding ('utf-8', 'iso-8859-1' etc) used when writing the final HTML pages.
+
+== plugins ==
+
+List of [[plugins]] to load.
+
+== exclude_dirs ==
+
+List of directories below `cfg.in_dir` to skip.
+
+== exclude_files ==
+
+List of files below `cfg.in_dir` to skip.
+
+== in_dir, out_dir, style_dir ==
+
+See [[commandline]].
--- /dev/null
+title: Functions
+parent: Home
+ctime: 2009-06-26
+
+= Calling functions =
+
+You can call functions only from [[template_mako]]. An example:
+
+ Modified ${format_date(mtime)}
+
+Here's list of functions defined by webber and it's default plugins:
+
+== format_date ==
+
+Takes a timestamp (seconds since 1st January 1970) and converts it into
+a string, using to `cfg.date_format`.
+
+Defined in `webber.py`.
+
+== get_breadcrumbs ==
+
+Returns the breadcrumbs as "`(linktitle, link)`" tuples.
+
+Defined in [[hierarchy.py|hierarchy]], where you find an example.
+
+== get_current_file ==
+
+Returns the current `class File` object.
+
+Defined in `webber.py`.
+
+== get_recently ==
+
+Returns a list of up to 10 pages below the current page. For each
+page, you'll get a "`(mtime, ctime, title, link)`" tuple back.
+
+Defined in [[hierarchy.py|hierarchy]], where you find an example.
+
+
+== get_sidemenu ==
+
+Returns a menu for the current page. For each page in this menu you'll
+get back a "`(level, part_of_path, is_current, title, link)`" tuple.
+
+Defined in [[hierarchy.py|hierarchy]], where you find an example.
+
+
+== func ==
+
+A sample function in the [[skeleton.py|skeleton]]. See below.
+
+
+= Writing functions =
+
+A function is a simply python function which returns HTML. The function needs
+to be decorated with "`@set_function(name)`". There's an example in
+[[skeleton.py|skeleton]], which looks like:
+
+ :::python
+ @set_function("func")
+ def sample_func():
+ if cfg.test_verbose:
+ print "in macro skeleton.sample_func"
+ return "{ output from sample function }"
+
+Inside your template, you can call the function without parameters or
+with arbitrary parameters, like this:
+
+ ${func(a=1, b="test")}
+
+Inside your function you can access this as ...
+
+* "`params.a`" which contains the integer "`1`"
+* "`params.b`" which contains the string "`test`"
--- /dev/null
+title: Generate hierarchy
+linktitly: Hierarchy
+parent: Plugins
+ctime: 2009-06-26
+
+This is one of the more complex plugins, used to generate menus and
+breadcrumbs. For this, it reads certain keyword from the
+[[pageformat]], built an internal parent-child representation.
+
+This is later used for by the functions "`get_breadcrumbs()`" and
+"`get_sidemenu()`", which you call from the [[template_mako]].
+
+= Page attributes =
+
+At the "`scan`" [[hook|hooks]], the plugin looks for entries like:
+
+ parent: Home
+
+or
+
+ childs: Cmdline, Inheritance
+
+Here's an example of five pages with different attributes:
+
+---
+
+ title: Homepage
+ linktitle: Home
+
+---
+
+ title: Impressum
+ parent: Home
+
+---
+
+ title: Job
+ parent: Home
+
+---
+
+ title: CV
+ parent: Job
+
+---
+
+ title: Knowledge
+ parent: Job
+
+---
+
+= Internal representation =
+
+the plugin would populate the variables "`_childs`" and "`_parent`" like this:
+
+ _parent = {
+ 'Impressum': 'Home',
+ 'CV': 'Job',
+ 'Knowledge': 'Job',
+ 'Job': 'Home'
+ }
+
+ _childs = {
+ 'Home': [(100, 'Job'),
+ (100, 'Impressum')],
+ 'Job': [(100, 'CV'),
+ (100, 'Knowledge')]}
+
+That's all you need to generate a sidemap, breadcrumbs or a side-menu.
+
+The pages are first ordered by some number, then by the "`linktitle`". If
+a page has no "`linktitle:`" attribute, then the normal title will be used
+instead.
+
+If you want to modify the sort-order, simply specify a "`order: 200`" in the
+page itself.
+
+= Generation of breadcrumbs =
+
+This is done via a suitable [[template_mako]]. The
+template uses the function "`get_breadcrumbs(linktitle)`" and returns
+(linktitle, link) tuples. As a bonus: all the links are always relative to
+the calling page.
+
+Here's a sample Mako template excerpt:
+
+ <ul>\
+ % for linktitle, link in get_breadcrumbs(file.linktitle):
+ <li><a href="${link}">${linktitle}</a></li>\
+ % endfor
+ </ul>\
+
+= Generation of a side-menu =
+
+This again is done via a suitable [[template_mako]]. The
+template uses the function "`get_sidemenu(linktitle)`" and returns (level,
+part_of_path, is_current, title, link) tuples. Again all links are relative
+to the calling page.
+
+* "`level`" is the indendation level, starting with 0. You can use this for
+ CSS "`id=`" or "`class`" attributes
+* "`part_of_path`" is a flag telling you if the mentioned page is part
+ of your path, i.e. if the specified page is in the breadcrumbs.
+* "`is_current`" is a flag marking the current page.
+* "`title`" is the full title for the page
+* "`link`" is the relative URL to the page
+
+Here's a sample Mako template excerpt that converts this into a HTML menu:
+
+ <ul id="sidebar">
+ % for level, part_of_path, current, title, link in get_sidemenu(file.linktitle):
+ <li class="sidebar${level}"\
+ % if current:
+ id="sidebar_current">${title | entity}</li>
+ % else:
+ ><a href="${link}">${title | entity}</a></li>
+ % endif
+ % endfor
+ </ul>
+
+= Generate a list of recently changed pages =
+
+To get a list of recently changed pages, do this:
+
+ <%
+ history = get_recently(get_current_file())
+ %>
+ % if len(history)>1:
+ <h2>Recent changed</h2>
+ % for mtime,ctime,title,link in history:
+ % if mtime > ctime:
+ Modified ${format_date(mtime)}\
+ % else:
+ Created ${format_date(ctime)}\
+ % endif
+ : <a href="${link}">${title | entity}</a><br />
+ % endfor
+ % endif
--- /dev/null
+title: Hooks
+parent: Webber
+ctime: 2009-06-26
+
+= At Startup =
+
+== addoptions ==
+
+Can be used by plugins to add their own command line options.
+
+"`params.parser`" contains the "`optparse`" based parser.
+
+Example:
+
+ :::python
+ @set_hook("addoptions")
+ def test_addoptions(params):
+ params.parser.add_option("-V", "--test_verbose", action="count",
+ dest="test_verbose", default=0,
+ help="print status messages to stdout")
+
+== checkconfig ==
+
+After the command-line options have been processed and incorporated into
+config object, this hook is called. Here each plugin can check if the
+specified configurations are sane.
+
+* "`params`" is empty, you should use "`cfg`" directly:
+
+Example:
+
+ :::python
+ @set_hook("checkconfig")
+ def test_checkconfig(params):
+ if cfg.test_verbose:
+ print "WARNING: I'll be now much more noisy"
+ # I could also directly modify the configuration:
+ cfg.foo = "mooh"
+
+== start ==
+
+This hook is called just before walking the directory tree.
+
+* "`params`" is empty:
+
+Example:
+
+ :::python
+ @set_hook("start")
+ def test_start(params):
+ print "in start hook"
+
+
+= While reading source files =
+
+== read ==
+
+Now webber walks the directory tree specified in "`cfg.in_dir"`, excluding
+anything from "`cfg.exclude_dir`" and "`cfg.exclude_file"`. For each of the
+remaining files this hook is called.
+
+Usually the the "`read_*`" plugins implement this hook. And usually they look
+at the file-extension and decide if they the can procecess this file or not.
+If they do, the plugin should also set "`file.render`" is normally "`html"`.
+However, it can be something else. In this case "`file.render`" specifies a
+hook that get's called for this file.
+
+The first hook that returns contents wins, no other hooks will be called.
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" contains a "`class File`" object
+
+Example:
+
+ :::python
+ @set_hook("read")
+ def read(params):
+ file = params.file
+ if file.rel_path.endswith(".html"):
+ file.render = "html"
+ f = file.read_keywords()
+ return f.read()
+
+== filter ==
+
+Any file that got read will be filtered. At this stage the text is still in the
+original format.
+
+Currently no webber-supplied plugin implements this.
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" contains a "`class File`" object
+* "`params.contents`" contains the text
+
+Example:
+
+ :::python
+ @set_hook("filter")
+ def filter(params):
+ params.content = params.content.replace("e", "EEEEE")
+
+
+= After reading files =
+
+At this stage all pages and their meta-information has been read. Now we can
+generate additional data, e.g. page hierarchy, tag-clouds, lists of recently
+changed files, etc. This is done via the following two hooks.
+
+The webber-supplied plugin [[hierarchy]] uses this
+mechanism.
+
+== scan ==
+
+This hook is called once per file with contents:
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" has a "`class File`" object
+* "`params.file.contents`" contains the text
+
+== scan_done ==
+
+Finally one "`scan_done`" hook is called. The plugin [[hierarchy]]
+uses this to sort links.
+
+* "`params`" is empty.
+
+= While rendering files =
+
+The following hooks are called for each file that has a rendered in
+"`file.render`" set. See the "`read"`-hook in how to set it.
+
+If "`file.render`" is "`html"`, then the hooks "`htmlize"`, "`linkify`" and
+"`pagetemplate`" are run in this order. Otherwise the hook specified
+in "`file.render`" is called.
+
+== htmlize ==
+
+This hook converts contents into HTML.
+
+The first hook that returns HTML, no other hooks will be called.
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" has a "`class File`" object
+* "`params.file.contents`" contains the text
+
+== linkify ==
+
+This hook should contain any link to html. Implemented by the plugin
+[[link]].
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" has a "`class File`" object
+* "`params.file.contents`" contains the HTML for the body text of the page
+
+== pagetemplate ==
+
+The implementation for this is responsible to generate the final html page,
+ready to be written. Implemented by [[template_mako]] plugin.
+
+The first hook that returns a finished HTML page, no other hooks will be
+called.
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" has a "`class File`" object
+* "`params.file.contents`" contains the HTML for the body text of the page
+
+== copyfile ==
+
+This is one local hook, run instead of the "`htmlize"`, "`linkify`" and
+"`pagetemplate`" hooks. It's defined and implemented by the plugin
+[[read_copyonly]].
+
+The first hook that returs anything wins, no other hooks will be called.
+
+* "`params.direc`" contains a "`class Directory`" object
+* "`params.file`" has a "`class File`" object
+
+= At the end =
+
+Now everythings has been converted to HTML and written out. And we're just
+one hook away from finishing webber:
+
+== finish ==
+
+This hook is called at the end of webber's execution. No webber-supplied
+plugin uses it currently, but you could use this to save local state into some
+file.
+
+* "`params`" is empty
--- /dev/null
+title: Webber
+ctime: 2009-06-26
+
+* [[overview]]
+* [[pageformat]]
+* [[configuration]]
+ * [[commandline]]
+ * [[inheritance]]
+* [[plugins]]
+ * [[read_rst]]
+ * [[read_markdown]]
+ * [[read_html]]
+ * [[read_copyonly]] (e.g. images)
+ * [[link]]
+ * [[hierarchy]]
+ * [[template_mako]]
+ * [[skeleton]]
+* [[hooks]]
+* [[functions]]
+* [[macros]]
+* [[templates]]
--- /dev/null
+title: Configuration inheritance
+linktitle: Inheritance
+parent: Webber
+ctime: 2009-06-26
+
+= Overview =
+
+Internally, webber uses a bunch of `Holder` classes to store information
+(command-line options, config file options, parameters for a directory,
+parameters for a file).
+
+Each `Holder` "inherits" configuration entries from the layer above:
+
+* `options` for command-line options
+* `cfg` for entries from the command line
+* `direc` for information about a directory
+* `file` (either directly or via `get_current_file()` for data about the
+ currently rendered file
+
+= Example =
+
+Due to parsing the [[command line|commandline]] there will exist an entry
+`options.style_dir`.
+
+However, you can also access this same value via `cfg.style_dir`,
+`direc.style_dir` and `file.style_dir`. Any one of them however could
+over-write the settings that originally was in `options`.
+
+Quite often you'll use this for the page template. In `webber.conf`, you
+specify `template: "default"`, which will be used for most pages. Any
+page that needs a different template will get `template: history` entry
+in it's header.
--- /dev/null
+title: Create HTML links
+linktitle: Linkify
+parent: Plugins
+ctime: 2009-06-26
+
+This plugin converts strings in the form
+
+<pre><code>[</code><code>[url]]
+[</code><code>[text|url]]
+[</code><code>[#anchor]]
+[</code><code>[text|url#anchor]]
+</code></pre>
+
+into HTML `<a href=...>` tags.
+
+= Automatic linkification =
+
+Instead of an URL you can also specify the following things:
+
+* the page title
+* the short link title
+* the basename of the file (filename without extension and directory name)
+
+In this case the link plugin will search throught all pages and take the
+first match.
+
+Example:
+
+Suppose you've two file "`testfile.md`" and "`testfile2.md`" which looks like this:
+
+ title: Foo
+ linktitle: bar
+
+---
+
+ title: Test2
+
+then the following two links
+
+<pre><code>[</code><code>[Foo]]
+[</code><code>[bar]]
+[</code><code>[testfile2]]
+</code></pre>
+
+will produce two links to the first file and one link to the second file.
+All text part of the HTML link will be substituted with the title of the
+referred pages, except you specify a text by yourself.
--- /dev/null
+title: Macros
+parent: Home
+ctime: 2009-06-26
+
+= Calling macros =
+
+Macros are executed whenever the sequence
+"<code>[</code><code>[!name]]</code>" or
+"<code>[</code><code>[!name args]]</code>" is in the source-file.
+
+Webber itself doesn't define any macros.
+
+
+= Defining macros =
+
+A macro is a simply python function which returns HTML. The function needs
+to be decorated with "`@set_macro(name)`". There's an example in
+[[skeleton.py|skeleton]], which looks like:
+
+ :::python
+ @set_macro("sample")
+ def sample_macro(params):
+ if cfg.test_verbose:
+ print "in macro skeleton.sample_macro, params:", params
+ return "{ output of sample macro }"
+
+If you call this macro, you'll see the output "[[!sample]]".
+
+* "`params.name`" contains the name of the macro
+* "`params.file`" contains the current "`class File`" object
+
+You can submit additional string arguments, e.g. "<code>[</code><code>[!sample
+arg1="string"]]</code>". This will yield
+
+* "`params.arg1`" contains "`string`"
+
--- /dev/null
+title: Overview
+parent: Webber
+ctime: 2009-06-26
+
+*Webber* is a static web-site generation tool, loosely based on ideas
+from IkiWiki and my own likings.
+
+TODO
--- /dev/null
+title: Page format
+parent: Home
+ctime: 2009-06-26
+
+Every page contains a header, then a blank line, and then the text that
+should show up in the web page.
+
+The header consists of several keywords, followed by a color and a space,
+and the the value.
+
+Here's an example:
+
+ title: Impressum
+
+ Hi, I'm Mario and I won't tell you more about me :-)
+
+
+= Your own keywords =
+
+Inside the template, functions and macros you can access all entries
+by "`file.XXXX`" and you're free to invent your own keywords:
+
+ title: Impressum
+ subtitle: What you should know about this web-site
+
+ Hi, I'm Mario and I won't tell you more about me :-)
+
+Now you can access "`${file.subtitle}`" in your template and as
+"`params.file.subtitle`" in your own [[macros|macros]] or
+[[functions|functions]].
+
+
+= Overriding configuration =
+
+As "`file`" inherits all configuration from "`cfg`" (see [[inheritance]]),
+you can also specify a different template on a per-file basis:
+
+ title: Impressum
+ template: boring_bg
+
+ Hi, I'm Mario and I won't tell you more about me :-)
+
+
+= Webber's keywords =
+
+== title ==
+
+Full (long) title for the page. End's up in
+"`<head><title>...</title></head>`".
+
+Very mandatory. Extremely important. You cannot have a page without a title.
+Never. Forget. The. Title.
+
+Depending on your template it will also be set inside "`<h1>...</h1>`" at the
+start of your web page.
+
+
+== linktitle ==
+
+Sometimes the title is simply too long, e.g. for breadcrumbs. Therefore you
+can specify a link-title, which will be used by [[hierarchy]] when generating
+breadcrumbs and a side-menu.
+
+
+== parent ==
+
+Used by [[hierarchy]] to indicate relationship.
+
+
+== order ==
+
+All pages with the same "`parent`" will end up below the parent on the
+side-menu (see [[hierarchy]] for details). They will be alphabetically sorted.
+
+If you don't want this default sort order, you can specify your own ordering.
+
+The default order is 100, so anything with a lower order will show up on the
+top, anything higher at the bottom of the side-menu.
+
+== links ==
+
+Used by [[hierarchy]] to indicate relationship. Usually it's better to use
+"`parent`" instead.
+
+
+== ctime ==
+
+Here you can specify an ISO formatted date and or time specifier, which contains
+the document creation date/time. Examples:
+
+ ctime: 2009-06-29
+ ctime: 2009-06-29 14:33
+
+If you don't specify this, then the documents "`mtime`" will be used instead.
+
+
+== mtime
+
+Here you can specify an ISO formatted date and or time specifier, which contains
+the document modification date/time. Examples:
+
+ mtime: 2009-06-29
+ mtime: 2009-06-29 14:33
+
+If you don't specify this, then the "last-modified"-time from the file-system
+will be used instead.
+
+
--- /dev/null
+title: Plugins
+parent: Webber
+ctime: 2009-06-26
+
+Webber doesn't do much on it's own. Almost all the real work is delegated
+to plugins. Those plugins do:
+
+* Read files and generate HTML snippets ([[read_rst.py|read_rst]],
+ [[read_markdown.py|read_markdown]], [[read_html.py|read_html]])
+ or copy files verbatim, e.g. for graphics
+ ([[read_copyonly.py|read_copyonly]])
+* Update internal state or modify HTML snippets
+ ([[hierarchy.py|hierarchy]], [[link.py|link]])
+* Create HTML pages ([[template_mako.py|template_mako]])
+
+There's another plugin there ([[skeleton.py|skeleton]], which is
+is just a demo for plugin-programmers.
+
+Plugins simply reside in the "`plugins/`" directory. However, webber
+doesn't load all of them automatically. Instead you specify in the
+configuration file [[webber.conf|configuration]] which
+plugins you want.
+
+Once plugins are loaded, webber orchestrates the work of itself and
+all plugins via [[hooks]].
--- /dev/null
+title: Read and copy binary files
+linktitle: Read binaries
+parent: Plugins
+ctime: 2009-06-26
+
+This plugin copies files (e.g. graphics files) into the destination
+folder.
+
+To configure which files should be copied you modify
+[[webber.conf|configuration.html]]. An example snippet migth be:
+
+ copy_files: [
+ "*.png",
+ "*.jpg",
+ "*.gif",
+ "*.css",
+ "robots.txt",
+ ]
--- /dev/null
+title: Read HTML
+parent: Plugins
+ctime: 2009-06-26
+
+This plugin reads HTML snippets from "`*.html`" files.
+
+Please note that currently the plugin assumes that this is a HTML snippet.
+That means: the snippes should only contain what is inside "`<body>`" and
+"`</body>`", but without those tags themselfes.
+
+A sample "`test.html`" document looks like this:
+
+ title: Job
+ parent: Home
+ ctime: 2008-10-01
+
+ <p>What I did in the past:<P>
+ <!-- to be continued -->
+
+You'll find more about "`title:`", "`parent:`" and "`ctime:`" in the
+[[page format|pageformat.html]] description.
--- /dev/null
+title: Read Markdown
+parent: Plugins
+ctime: 2009-06-26
+
+This plugin reads "`*.md`" files and converts them to HTML.
+
+"[[Markdown|http://daringfireball.net/projects/markdown/]]" is a wiki-like
+text format. The plugin "`read_markdown.py`" doesn't use the
+standard Python module "`markdown`", but instead the faster and simpler
+[[markdown2|http://code.google.com/p/python-markdown2/]] modoule.
+
+A sample "`test.md`" document looks like this:
+
+ title: Impressum
+ parent: Home
+ ctime: 2008-10-01
+
+ # Address
+
+ Mario Marionetti
+ 10, Mariott St
+ Marioland 1007
+
+ Don't send me spam, *ever*!
+
+You'll find more about "`title:`", "`parent:`" and "`ctime:`" in the
+[[page format|pageformat.html]] description.
+
+= Modifications =
+
+This implementation is based on python-markdown2 version 1.0.1.12, but has been
+changed this way:
+
+* file-vars (emacs-style settings inside the file) have been disabled
+* "Standardize line endings" removed
+* call to _do_links() removed (we have the [[linkify|link.html]] pass for
+ this)
+* logging removed
+* allow "= Header =" in addition to "# Header #"
--- /dev/null
+title: Read RST
+parent: Plugins
+ctime: 2009-06-26
+
+This plugin reads "`*.rst`" files and converts them to HTML.
+
+"RST" is the abbreviation for
+[[reStructuredText|http://docutils.sourceforge.net/rst.html]], a format
+common for many python programmers. The plugin "`read_rst.py`" uses the
+standard Python module "`docutils`" to convert RST into HTML. A sample
+"`test.rst`" document looks like this:
+
+ title: Impressum
+ parent: Home
+ ctime: 2008-10-01
+
+ Address
+ =======
+
+ |Mario Marionetti
+ |10, Mariott St
+ |Marioland 1007
+
+ Don't send me spam, *ever*!
+
+You'll find more about "`title:`", "`parent:`" and "`ctime:`" in the
+[[page format|pageformat.html]] description.
\ No newline at end of file
--- /dev/null
+title: Sample plugin skeleton
+linktitly: Skeleton
+parent: Plugins
+ctime: 2009-06-26
+
+TODO
--- /dev/null
+<%def name="contents()">\
+${body}
+</%def>\
+#######################################################################
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+<head>
+<title>${file.title | entity}</title>
+<meta http-equiv="Content-Type" content="text/html; charset=${file.output_encoding}"/>
+% if len(keywords):
+ <meta name="keywords" content="${keywords | entity}" />
+% endif
+% if len(description):
+ <meta name="description" content="${description | entity}" />
+% endif
+</head>
+<body>
+<h1>${file.title | entity}</h1>
+${self.contents()}
+</body>
+</html>
--- /dev/null
+<%inherit file="default.tmpl"/>
+#######################################################################
+<%def name="contents()">\
+${body}
+<%
+ history = get_recently(get_current_file())
+%>
+% if len(history)>1:
+<h2>What's new?</h2>
+% for mtime,ctime,title,link in history:
+% if mtime > ctime:
+ Modified ${format_date(mtime)}\
+% else:
+ Created ${format_date(ctime)}\
+% endif
+: <a href="${link}">${title | entity}</a><br />
+% endfor
+% endif
+</%def>\
--- /dev/null
+title: Web page template
+linktitly: HTML Template
+parent: Plugins
+ctime: 2009-06-26
+
+TODO
--- /dev/null
+title: Templates
+parent: Home
+ctime: 2009-06-26
+
+TODO
--- /dev/null
+template: "history"
+subtitle: "Webber"
+date_format: "%Y-%m.%d"
+input_encoding: "iso-8859-1"
+output_encoding: "iso-8859-1"
+plugins: [
+ "skeleton",
+ "hierarchy",
+ "link",
+ "read_rst",
+ "read_html",
+ "read_copyonly",
+ "read_markdown",
+ "template_mako",
+ ]
+exclude_dir: [
+ ]
+exclude_files: [
+ "webber.conf",
+ "*.tmpl",
+ ]
+copy_files: [
+ "*.png",
+ "*.jpg",
+ "*.gif",
+ "*.mpg",
+ "*.css",
+ "robots.txt",
+]
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+import re
+
+reSPLIT = re.compile(r',\s*')
+
+_childs = {}
+_parent = {}
+
+def memorize_links(title, links):
+ global _childs
+ if not links:
+ return
+ order = 100
+ for link in reSPLIT.split(links):
+ #print title, link
+ if not _childs.has_key(title):
+ _childs[title] = []
+ _childs[title].append( (order,link))
+ order += 100
+ _parent[link] = title
+
+
+def memorize_parent(title, parent, order):
+ #print "memorize_parent:", title, parent
+ #print " parent:", _parent
+ #print " childs:", _childs
+ if order==-1:
+ order = 0
+ if _childs.has_key(parent):
+ for c in _childs:
+ for o,n in _childs[c]:
+ if o > order:
+ order = o
+ else:
+ _childs[parent] = []
+ order += 100
+ #print "order:", title, order
+ if not _childs.has_key(parent):
+ _childs[parent] = []
+ _childs[parent].append( (order, title) )
+ _parent[title] = parent
+
+
+#
+# The "scan" plugins should scan for meta-data, mostly for links.
+#
+# params.direc contains the "class Directory" object
+# params.file has the "class File" object
+# params.file.contents contains the text
+#
+@set_hook("scan")
+def scan(params):
+ file = params["file"]
+ if file.has_key("links"):
+ memorize_links(file.linktitle, file.links)
+ if file.has_key("parent"):
+ if file.has_key("order"):
+ order = int(file.order)
+ else:
+ order = -1
+ memorize_parent(file.linktitle, file.parent, order)
+
+
+@set_hook("scan_done")
+def scan_done(params):
+ """After every file has been scanned, we sort the list of childs-per-page
+ in ascending order."""
+
+ for c in _childs:
+ _childs[c].sort()
+ return
+
+ print "_parent:"
+ for c in _parent:
+ print " ", c, _parent[c]
+ print "_childs:"
+ for c in _childs: print " ", c,_childs[c]
+
+
+@set_function("get_breadcrumbs")
+def get_breadcrumbs(orig_page):
+ """Returns something like ['Home', 'Beruf', 'Werdegang']. This can
+ be easyly used to generate breadcrumbs HTML code."""
+ res = [(orig_page, get_link_from(orig_page, orig_page))]
+ page = orig_page
+ #print "orig_page:", orig_page
+ while _parent.has_key(page):
+ page = _parent[page]
+ link = get_link_from(orig_page, page)
+ #print " page, link:", page, link
+ res.insert(0, (page, link))
+ return res
+
+
+
+@set_function("get_sidemenu")
+def get_sidemenu(page):
+ """Returns an array with a side-menu. Everything from the current
+ page upwards is shown, as well as one level below the current
+ position. The array has the following items:
+
+ level part-of-path current-page title
+
+ Example:
+ 0 1 0 Home
+ 0 1 0 Beruf
+ 1 0 0 Kenntnisse
+ 1 1 0 Werdegang
+ 2 0 1 Alte
+ 0 0 0 Haus
+ """
+ # Determine root page:
+ bread = get_breadcrumbs(page)
+ #print "Menu for:", page
+ #print "Bread:", bread
+
+ root = "Home" #TODO
+ res = [(0, 1, int(root==page), root, get_link_from(page, root))]
+
+ def do_menu(pg, level):
+ #print "pg, has_key:", pg, _childs.has_key(pg)
+ if _childs.has_key(pg):
+ for p in _childs[pg]:
+ subpage = p[1]
+ in_bread = False
+ for b in bread:
+ if b[0] == subpage:
+ in_bread = True
+ break
+
+ go_deeper = in_bread or (subpage==page)
+ #print "subpage:", subpage, "in bread:", in_bread, "go deeper:", go_deeper
+ link = get_link_from(page, subpage)
+ res.append((level, int(subpage in bread), int(subpage==page), subpage, link))
+ if go_deeper:
+ do_menu(subpage, level+1)
+
+ # TODO: make this configurable, e.g. cfg.rootpage, otherwise a page
+ # that is outside of the menu won't show a menu
+ do_menu("Home", 0)
+ return res
+
+
+
+
+@set_function("get_recently")
+def get_recently(file):
+ #file = get_current_file()
+ #print "XXXXXX:", file.linktitle
+ pg = []
+
+ max_n = 10 # TODO: configurable?
+ orig_page = file.linktitle
+
+ def addPage(pg, title):
+ #print "addPage", title
+ for f in files:
+ file = files[f]
+ #print file
+ if file.has_key("linktitle") and file.linktitle == title:
+ pg.append( (file.mtime, file.ctime, file.title, get_link_from(orig_page, file.linktitle)) )
+ if _childs.has_key(file.linktitle):
+ for c in _childs[file.linktitle]:
+ #print "c:", c
+ addPage(pg, c[1])
+ if len(pg) == max_n:
+ return
+ addPage(pg, orig_page)
+ pg.sort(reverse=True)
+ #for p in pg: print p
+ return pg
+
+
+
+
+
+if __name__ == "__main__":
+ # You can call this test-code this way:
+ #
+ # PYTHONPATH=`pwd` python plugins/hierarchy.py
+ #
+ memorize_parent("Impressum", "Home", 99999)
+ memorize_parent("Beruf", "Home", 100)
+ memorize_parent("Werdegang", "Beruf", 100)
+ memorize_parent("Kenntnisse", "Beruf", 200)
+ scan_done(None)
+
+ #print get_breadcrumbs("Home")
+ #print get_breadcrumbs("Beruf")
+ #print get_breadcrumbs("Werdegang")
+ #print get_breadcrumbs("Kenntnisse")
+ #for t in get_sidemenu("Home"): print t
+ #for t in get_sidemenu("Beruf"): print t
+ for t in get_sidemenu("Kenntnisse"): print t
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+import os, re
+
+# To understand this beast, read /usr/share/doc/python2.5-doc/html/lib/module-re.html :-)
+
+reLink = re.compile(r'''
+ \[\[ # Begin of link
+ (?=[^!]) # Don't fire for macros
+ (?:
+ ([^\]\|]+) # 1: link text
+ \| # followed by '|'
+ )? # optional
+ ([^\n\r\]#]+) # 2: page to link to
+ (
+ \# # '#', beginning of anchor
+ [^\s\]]+ # 3: anchor text, doesn't contain spaces or ']'
+ )? # optional
+ \]\] # end of link
+ ''', re.VERBOSE)
+
+def do_link(m):
+ """Used in re.sub() to substitute link with HTML"""
+ text = m.group(1) or ""
+ text = text.replace("\n", " ")
+ link = m.group(2).replace("\n", " ")
+ anchor = m.group(3) or ""
+ if link.find(".") == -1:
+ #link = link.tolower()
+ for f in files:
+ file = files[f]
+ if file.title == link or \
+ file.linktitle == link or \
+ os.path.splitext(os.path.basename(file.path))[0] == link:
+ #print "LINK: '%s' '%s' -> '%s'" % (text, link, file.linktitle)
+ if not text:
+ text = file.title
+ link = get_link_from(get_current_file().linktitle, file.linktitle)
+ #print "LINK: '%s' '%s'" % (text, link)
+ break
+ # TODO: validate link
+ return '<a href="%s%s">%s</a>' % (link, anchor, text)
+
+
+def test_link():
+ for s in (
+ 'Before [[!macro]] after',
+ 'Before [[link]] after',
+ 'Before [[Text|link]] after',
+ 'Before [[Text|link#anchor]] after'
+ ):
+ m = reLink.search(s)
+ if m:
+ print "link:", s
+ print " name:", m.group(1)
+ print " link:", m.group(2)
+ print " anchor:", m.group(3)
+ else:
+ print "No link:", s
+
+def test_sub():
+ for s in (
+ 'Before [[!macro]] after',
+ 'Before [[link]] after',
+ 'Before [[Text|link]] after',
+ 'Before [[Text|link#anchor]] after'
+ ):
+ print s
+ res = reLink.sub(do_link, s)
+ print "", res
+
+#test_link()
+#test_sub()
+
+
+
+
+@set_hook("linkify")
+def linkify(params):
+ return reLink.sub(do_link, params.file.contents)
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+import os, fnmatch
+
+
+@set_hook("read")
+def read(params):
+ file = params.file
+ #print "file:", file.rel_path
+ for c in cfg.copy_files:
+ if fnmatch.fnmatchcase(file.rel_path, c):
+ #print "Copy:", file.rel_path
+ file.render = "copyfile"
+ file.contents = ""
+
+@set_hook("copyfile")
+def copyfile(params):
+ file = params.file
+ log("copying file %s" % file.rel_path, level=7)
+ out_path = os.path.join(cfg.out_dir, file.rel_path)
+ out_dir = os.path.split(out_path)[0]
+ try:
+ os.makedirs(out_dir)
+ except OSError:
+ pass
+ cmd = "cp -l %s %s" % (
+ os.path.join(cfg.in_dir, file.rel_path),
+ out_path
+ )
+ #print cmd
+ os.system(cmd)
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+
+
+@set_hook("read")
+def read(params):
+ file = params.file
+ if file.rel_path.endswith(".html"):
+ file.render = "html"
+ f = file.read_keywords()
+ return f.read()
+
+
+@set_hook("htmlize")
+def htmlize(params):
+ """Parse HTML and "convert" it to HTML :-)"""
+
+ file = params.file
+ if not file.rel_path.endswith(".html"):
+ return
+
+ return file.contents
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+
+# based on code from http://code.google.com/p/python-markdown2/
+# Copyright (c) 2007-2008 ActiveState Corp.
+# License: MIT (http://www.opensource.org/licenses/mit-license.php)
+#
+# I used version 1.0.1.12, but deleted:
+# * file-vars (emacs-style settings inside the file)
+# * Standardize line endings
+# * call to _do_links()
+# * logging
+# * allow "= Header =" in addition to "# Header #"
+#
+
+import os, sys, re, codecs
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+from random import random
+
+
+
+#---- Python version compat
+
+if sys.version_info[:2] < (2,4):
+ from sets import Set as set
+ def reversed(sequence):
+ for i in sequence[::-1]:
+ yield i
+ def _unicode_decode(s, encoding, errors='xmlcharrefreplace'):
+ return unicode(s, encoding, errors)
+else:
+ def _unicode_decode(s, encoding, errors='strict'):
+ return s.decode(encoding, errors)
+
+
+#---- globals
+
+DEBUG = False
+
+DEFAULT_TAB_WIDTH = 4
+
+# Table of hash values for escaped characters:
+def _escape_hash(s):
+ # Lame attempt to avoid possible collision with someone actually
+ # using the MD5 hexdigest of one of these chars in there text.
+ # Other ideas: random.random(), uuid.uuid()
+ #return md5(s).hexdigest() # Markdown.pl effectively does this.
+ return 'md5-'+md5(s).hexdigest()
+g_escape_table = dict([(ch, _escape_hash(ch)) for ch in '\\`*_{}[]()>#+-.!'])
+
+
+
+#---- exceptions
+
+class MarkdownError(Exception):
+ pass
+
+
+
+#---- public api
+
+def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+ safe_mode=None, extras=None, link_patterns=None):
+ return Markdown(html4tags=html4tags, tab_width=tab_width,
+ safe_mode=safe_mode, extras=extras,
+ link_patterns=link_patterns).convert(text)
+
+class Markdown(object):
+ # The dict of "extras" to enable in processing -- a mapping of
+ # extra name to argument for the extra. Most extras do not have an
+ # argument, in which case the value is None.
+ #
+ # This can be set via (a) subclassing and (b) the constructor
+ # "extras" argument.
+ extras = None
+
+ urls = None
+ titles = None
+ html_blocks = None
+ html_spans = None
+ html_removed_text = "[HTML_REMOVED]" # for compat with markdown.py
+
+ # Used to track when we're inside an ordered or unordered list
+ # (see _ProcessListItems() for details):
+ list_level = 0
+
+ _ws_only_line_re = re.compile(r"^[ \t]+$", re.M)
+
+ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
+ extras=None, link_patterns=None):
+ if html4tags:
+ self.empty_element_suffix = ">"
+ else:
+ self.empty_element_suffix = " />"
+ self.tab_width = tab_width
+
+ # For compatibility with earlier markdown2.py and with
+ # markdown.py's safe_mode being a boolean,
+ # safe_mode == True -> "replace"
+ if safe_mode is True:
+ self.safe_mode = "replace"
+ else:
+ self.safe_mode = safe_mode
+
+ if self.extras is None:
+ self.extras = {}
+ elif not isinstance(self.extras, dict):
+ self.extras = dict([(e, None) for e in self.extras])
+ if extras:
+ if not isinstance(extras, dict):
+ extras = dict([(e, None) for e in extras])
+ self.extras.update(extras)
+ assert isinstance(self.extras, dict)
+ self._instance_extras = self.extras.copy()
+ self.link_patterns = link_patterns
+ self._outdent_re = re.compile(r'^(\t|[ ]{1,%d})' % tab_width, re.M)
+
+ def reset(self):
+ self.urls = {}
+ self.titles = {}
+ self.html_blocks = {}
+ self.html_spans = {}
+ self.list_level = 0
+ self.extras = self._instance_extras.copy()
+ self.encoding = 'utf-8'
+ if "footnotes" in self.extras:
+ self.footnotes = {}
+ self.footnote_ids = []
+
+ def convert(self, text, encoding=None):
+ """Convert the given text."""
+ # Main function. The order in which other subs are called here is
+ # essential. Link and image substitutions need to happen before
+ # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+ # and <img> tags get encoded.
+
+ # Clear the global hashes. If we don't clear these, you get conflicts
+ # from other articles when generating a page which contains more than
+ # one article (e.g. an index page that shows the N most recent
+ # articles):
+ self.reset()
+ if encoding:
+ self.encoding = encoding
+
+ if not isinstance(text, unicode):
+ text = unicode(text, self.encoding)
+
+ # Standardize line endings:
+ #text = re.sub("\r\n|\r", "\n", text)
+
+ # Make sure $text ends with a couple of newlines:
+ text += "\n\n"
+
+ # Convert all tabs to spaces.
+ text = self._detab(text)
+
+ # Strip any lines consisting only of spaces and tabs.
+ # This makes subsequent regexen easier to write, because we can
+ # match consecutive blank lines with /\n+/ instead of something
+ # contorted like /[ \t]*\n+/ .
+ text = self._ws_only_line_re.sub("", text)
+
+ if self.safe_mode:
+ text = self._hash_html_spans(text)
+
+ # Turn block-level HTML blocks into hash entries
+ text = self._hash_html_blocks(text, raw=True)
+
+ # Strip link definitions, store in hashes.
+ if "footnotes" in self.extras:
+ # Must do footnotes first because an unlucky footnote defn
+ # looks like a link defn:
+ # [^4]: this "looks like a link defn"
+ text = self._strip_footnote_definitions(text)
+ text = self._strip_link_definitions(text)
+
+ text = self._run_block_gamut(text)
+
+ text = self._unescape_special_chars(text)
+
+ if "footnotes" in self.extras:
+ text = self._add_footnotes(text)
+
+ if self.safe_mode:
+ text = self._unhash_html_spans(text)
+
+ text += "\n"
+ return text
+
+ # Cribbed from a post by Bart Lateur:
+ # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+ _detab_re = re.compile(r'(.*?)\t', re.M)
+ def _detab_sub(self, match):
+ g1 = match.group(1)
+ return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
+ def _detab(self, text):
+ r"""Remove (leading?) tabs from a file.
+
+ >>> m = Markdown()
+ >>> m._detab("\tfoo")
+ ' foo'
+ >>> m._detab(" \tfoo")
+ ' foo'
+ >>> m._detab("\t foo")
+ ' foo'
+ >>> m._detab(" foo")
+ ' foo'
+ >>> m._detab(" foo\n\tbar\tblam")
+ ' foo\n bar blam'
+ """
+ if '\t' not in text:
+ return text
+ return self._detab_re.subn(self._detab_sub, text)[0]
+
+ _block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
+ _strict_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ </\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_a,
+ re.X | re.M)
+
+ _block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math'
+ _liberal_tag_block_re = re.compile(r"""
+ ( # save in \1
+ ^ # start of line (with re.M)
+ <(%s) # start tag = \2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ .*</\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )
+ """ % _block_tags_b,
+ re.X | re.M)
+
+ def _hash_html_block_sub(self, match, raw=False):
+ html = match.group(1)
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ return "\n\n" + key + "\n\n"
+
+ def _hash_html_blocks(self, text, raw=False):
+ """Hashify HTML blocks
+
+ We only want to do this for block-level HTML tags, such as headers,
+ lists, and tables. That's because we still want to wrap <p>s around
+ "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ phrase emphasis, and spans. The list of tags we're looking for is
+ hard-coded.
+
+ @param raw {boolean} indicates if these are raw HTML blocks in
+ the original source. It makes a difference in "safe" mode.
+ """
+ if '<' not in text:
+ return text
+
+ # Pass `raw` value into our calls to self._hash_html_block_sub.
+ hash_html_block_sub = _curry(self._hash_html_block_sub, raw=raw)
+
+ # First, look for nested blocks, e.g.:
+ # <div>
+ # <div>
+ # tags for inner block must be indented.
+ # </div>
+ # </div>
+ #
+ # The outermost tags must start at the left margin for this to match, and
+ # the inner nested divs must be indented.
+ # We need to do this before the next, more liberal match, because the next
+ # match will start at the first `<div>` and stop at the first `</div>`.
+ text = self._strict_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+ text = self._liberal_tag_block_re.sub(hash_html_block_sub, text)
+
+ # Special case just for <hr />. It was easier to make a special
+ # case than to make the other regex more complicated.
+ if "<hr" in text:
+ _hr_tag_re = _hr_tag_re_from_tab_width(self.tab_width)
+ text = _hr_tag_re.sub(hash_html_block_sub, text)
+
+ # Special case for standalone HTML comments:
+ if "<!--" in text:
+ start = 0
+ while True:
+ # Delimiters for next comment block.
+ try:
+ start_idx = text.index("<!--", start)
+ except ValueError, ex:
+ break
+ try:
+ end_idx = text.index("-->", start_idx) + 3
+ except ValueError, ex:
+ break
+
+ # Start position for next comment block search.
+ start = end_idx
+
+ # Validate whitespace before comment.
+ if start_idx:
+ # - Up to `tab_width - 1` spaces before start_idx.
+ for i in range(self.tab_width - 1):
+ if text[start_idx - 1] != ' ':
+ break
+ start_idx -= 1
+ if start_idx == 0:
+ break
+ # - Must be preceded by 2 newlines or hit the start of
+ # the document.
+ if start_idx == 0:
+ pass
+ elif start_idx == 1 and text[0] == '\n':
+ start_idx = 0 # to match minute detail of Markdown.pl regex
+ elif text[start_idx-2:start_idx] == '\n\n':
+ pass
+ else:
+ break
+
+ # Validate whitespace after comment.
+ # - Any number of spaces and tabs.
+ while end_idx < len(text):
+ if text[end_idx] not in ' \t':
+ break
+ end_idx += 1
+ # - Must be following by 2 newlines or hit end of text.
+ if text[end_idx:end_idx+2] not in ('', '\n', '\n\n'):
+ continue
+
+ # Escape and hash (must match `_hash_html_block_sub`).
+ html = text[start_idx:end_idx]
+ if raw and self.safe_mode:
+ html = self._sanitize_html(html)
+ key = _hash_text(html)
+ self.html_blocks[key] = html
+ text = text[:start_idx] + "\n\n" + key + "\n\n" + text[end_idx:]
+
+ if "xml" in self.extras:
+ # Treat XML processing instructions and namespaced one-liner
+ # tags as if they were block HTML tags. E.g., if standalone
+ # (i.e. are their own paragraph), the following do not get
+ # wrapped in a <p> tag:
+ # <?foo bar?>
+ #
+ # <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="chapter_1.md"/>
+ _xml_oneliner_re = _xml_oneliner_re_from_tab_width(self.tab_width)
+ text = _xml_oneliner_re.sub(hash_html_block_sub, text)
+
+ return text
+
+ def _strip_link_definitions(self, text):
+ # Strips link definitions from text, stores the URLs and titles in
+ # hash references.
+ less_than_tab = self.tab_width - 1
+
+ # Link defs are in the form:
+ # [id]: url "optional title"
+ _link_def_re = re.compile(r"""
+ ^[ ]{0,%d}\[(.+)\]: # id = \1
+ [ \t]*
+ \n? # maybe *one* newline
+ [ \t]*
+ <?(.+?)>? # url = \2
+ [ \t]*
+ (?:
+ \n? # maybe one newline
+ [ \t]*
+ (?<=\s) # lookbehind for whitespace
+ ['"(]
+ ([^\n]*) # title = \3
+ ['")]
+ [ \t]*
+ )? # title is optional
+ (?:\n+|\Z)
+ """ % less_than_tab, re.X | re.M | re.U)
+ return _link_def_re.sub(self._extract_link_def_sub, text)
+
+ def _extract_link_def_sub(self, match):
+ id, url, title = match.groups()
+ key = id.lower() # Link IDs are case-insensitive
+ self.urls[key] = self._encode_amps_and_angles(url)
+ if title:
+ self.titles[key] = title.replace('"', '"')
+ return ""
+
+ def _extract_footnote_def_sub(self, match):
+ id, text = match.groups()
+ text = _dedent(text, skip_first_line=not text.startswith('\n')).strip()
+ normed_id = re.sub(r'\W', '-', id)
+ # Ensure footnote text ends with a couple newlines (for some
+ # block gamut matches).
+ self.footnotes[normed_id] = text + "\n\n"
+ return ""
+
+ def _strip_footnote_definitions(self, text):
+ """A footnote definition looks like this:
+
+ [^note-id]: Text of the note.
+
+ May include one or more indented paragraphs.
+
+ Where,
+ - The 'note-id' can be pretty much anything, though typically it
+ is the number of the footnote.
+ - The first paragraph may start on the next line, like so:
+
+ [^note-id]:
+ Text of the note.
+ """
+ less_than_tab = self.tab_width - 1
+ footnote_def_re = re.compile(r'''
+ ^[ ]{0,%d}\[\^(.+)\]: # id = \1
+ [ \t]*
+ ( # footnote text = \2
+ # First line need not start with the spaces.
+ (?:\s*.*\n+)
+ (?:
+ (?:[ ]{%d} | \t) # Subsequent lines must be indented.
+ .*\n+
+ )*
+ )
+ # Lookahead for non-space at line-start, or end of doc.
+ (?:(?=^[ ]{0,%d}\S)|\Z)
+ ''' % (less_than_tab, self.tab_width, self.tab_width),
+ re.X | re.M)
+ return footnote_def_re.sub(self._extract_footnote_def_sub, text)
+
+
+ _hr_res = [
+ re.compile(r"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", re.M),
+ re.compile(r"^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$", re.M),
+ re.compile(r"^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$", re.M),
+ ]
+
+ def _run_block_gamut(self, text):
+ # These are all the transformations that form block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_headers(text)
+
+ # Do Horizontal Rules:
+ hr = "\n<hr"+self.empty_element_suffix+"\n"
+ for hr_re in self._hr_res:
+ text = hr_re.sub(hr, text)
+
+ text = self._do_lists(text)
+
+ if "pyshell" in self.extras:
+ text = self._prepare_pyshell_blocks(text)
+
+ text = self._do_code_blocks(text)
+
+ text = self._do_block_quotes(text)
+
+ # We already ran _HashHTMLBlocks() before, in Markdown(), but that
+ # was to escape raw HTML in the original Markdown source. This time,
+ # we're escaping the markup we've just created, so that we don't wrap
+ # <p> tags around block-level tags.
+ text = self._hash_html_blocks(text)
+
+ text = self._form_paragraphs(text)
+
+ return text
+
+ def _pyshell_block_sub(self, match):
+ lines = match.group(0).splitlines(0)
+ _dedentlines(lines)
+ indent = ' ' * self.tab_width
+ s = ('\n' # separate from possible cuddled paragraph
+ + indent + ('\n'+indent).join(lines)
+ + '\n\n')
+ return s
+
+ def _prepare_pyshell_blocks(self, text):
+ """Ensure that Python interactive shell sessions are put in
+ code blocks -- even if not properly indented.
+ """
+ if ">>>" not in text:
+ return text
+
+ less_than_tab = self.tab_width - 1
+ _pyshell_block_re = re.compile(r"""
+ ^([ ]{0,%d})>>>[ ].*\n # first line
+ ^(\1.*\S+.*\n)* # any number of subsequent lines
+ ^\n # ends with a blank line
+ """ % less_than_tab, re.M | re.X)
+
+ return _pyshell_block_re.sub(self._pyshell_block_sub, text)
+
+ def _run_span_gamut(self, text):
+ # These are all the transformations that occur *within* block-level
+ # tags like paragraphs, headers, and list items.
+
+ text = self._do_code_spans(text)
+
+ text = self._escape_special_chars(text)
+
+ # Process anchor and image tags.
+ #text = self._do_links(text)
+
+ # Make links out of things like `<http://example.com/>`
+ # Must come after _do_links(), because you can use < and >
+ # delimiters in inline links like [this](<url>).
+ text = self._do_auto_links(text)
+
+ if "link-patterns" in self.extras:
+ text = self._do_link_patterns(text)
+
+ text = self._encode_amps_and_angles(text)
+
+ text = self._do_italics_and_bold(text)
+
+ # Do hard breaks:
+ text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
+
+ return text
+
+ # "Sorta" because auto-links are identified as "tag" tokens.
+ _sorta_html_tokenize_re = re.compile(r"""
+ (
+ # tag
+ </?
+ (?:\w+) # tag name
+ (?:\s+(?:[\w-]+:)?[\w-]+=(?:".*?"|'.*?'))* # attributes
+ \s*/?>
+ |
+ # auto-link (e.g., <http://www.activestate.com/>)
+ <\w+[^>]*>
+ |
+ <!--.*?--> # comment
+ |
+ <\?.*?\?> # processing instruction
+ )
+ """, re.X)
+
+ def _escape_special_chars(self, text):
+ # Python markdown note: the HTML tokenization here differs from
+ # that in Markdown.pl, hence the behaviour for subtle cases can
+ # differ (I believe the tokenizer here does a better job because
+ # it isn't susceptible to unmatched '<' and '>' in HTML tags).
+ # Note, however, that '>' is not allowed in an auto-link URL
+ # here.
+ escaped = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup:
+ # Within tags/HTML-comments/auto-links, encode * and _
+ # so they don't conflict with their use in Markdown for
+ # italics and strong. We're replacing each such
+ # character with its corresponding MD5 checksum value;
+ # this is likely overkill, but it should prevent us from
+ # colliding with the escape values by accident.
+ escaped.append(token.replace('*', g_escape_table['*'])
+ .replace('_', g_escape_table['_']))
+ else:
+ escaped.append(self._encode_backslash_escapes(token))
+ is_html_markup = not is_html_markup
+ return ''.join(escaped)
+
+ def _hash_html_spans(self, text):
+ # Used for safe_mode.
+
+ def _is_auto_link(s):
+ if ':' in s and self._auto_link_re.match(s):
+ return True
+ elif '@' in s and self._auto_email_link_re.match(s):
+ return True
+ return False
+
+ tokens = []
+ is_html_markup = False
+ for token in self._sorta_html_tokenize_re.split(text):
+ if is_html_markup and not _is_auto_link(token):
+ sanitized = self._sanitize_html(token)
+ key = _hash_text(sanitized)
+ self.html_spans[key] = sanitized
+ tokens.append(key)
+ else:
+ tokens.append(token)
+ is_html_markup = not is_html_markup
+ return ''.join(tokens)
+
+ def _unhash_html_spans(self, text):
+ for key, sanitized in self.html_spans.items():
+ text = text.replace(key, sanitized)
+ return text
+
+ def _sanitize_html(self, s):
+ if self.safe_mode == "replace":
+ return self.html_removed_text
+ elif self.safe_mode == "escape":
+ replacements = [
+ ('&', '&'),
+ ('<', '<'),
+ ('>', '>'),
+ ]
+ for before, after in replacements:
+ s = s.replace(before, after)
+ return s
+ else:
+ raise MarkdownError("invalid value for 'safe_mode': %r (must be "
+ "'escape' or 'replace')" % self.safe_mode)
+
+ _tail_of_inline_link_re = re.compile(r'''
+ # Match tail of: [text](/url/) or [text](/url/ "title")
+ \( # literal paren
+ [ \t]*
+ (?P<url> # \1
+ <.*?>
+ |
+ .*?
+ )
+ [ \t]*
+ ( # \2
+ (['"]) # quote char = \3
+ (?P<title>.*?)
+ \3 # matching quote
+ )? # title is optional
+ \)
+ ''', re.X | re.S)
+ _tail_of_reference_link_re = re.compile(r'''
+ # Match tail of: [text][id]
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+ \[
+ (?P<id>.*?)
+ \]
+ ''', re.X | re.S)
+
+ def _do_links(self, text):
+ """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
+
+ This is a combination of Markdown.pl's _DoAnchors() and
+ _DoImages(). They are done together because that simplified the
+ approach. It was necessary to use a different approach than
+ Markdown.pl because of the lack of atomic matching support in
+ Python's regex engine used in $g_nested_brackets.
+ """
+ MAX_LINK_TEXT_SENTINEL = 300
+
+ # `anchor_allowed_pos` is used to support img links inside
+ # anchors, but not anchors inside anchors. An anchor's start
+ # pos must be `>= anchor_allowed_pos`.
+ anchor_allowed_pos = 0
+
+ curr_pos = 0
+ while True: # Handle the next link.
+ # The next '[' is the start of:
+ # - an inline anchor: [text](url "title")
+ # - a reference anchor: [text][id]
+ # - an inline img: ![text](url "title")
+ # - a reference img: ![text][id]
+ # - a footnote ref: [^id]
+ # (Only if 'footnotes' extra enabled)
+ # - a footnote defn: [^id]: ...
+ # (Only if 'footnotes' extra enabled) These have already
+ # been stripped in _strip_footnote_definitions() so no
+ # need to watch for them.
+ # - a link definition: [id]: url "title"
+ # These have already been stripped in
+ # _strip_link_definitions() so no need to watch for them.
+ # - not markup: [...anything else...
+ try:
+ start_idx = text.index('[', curr_pos)
+ except ValueError:
+ break
+ text_length = len(text)
+
+ # Find the matching closing ']'.
+ # Markdown.pl allows *matching* brackets in link text so we
+ # will here too. Markdown.pl *doesn't* currently allow
+ # matching brackets in img alt text -- we'll differ in that
+ # regard.
+ bracket_depth = 0
+ for p in range(start_idx+1, min(start_idx+MAX_LINK_TEXT_SENTINEL,
+ text_length)):
+ ch = text[p]
+ if ch == ']':
+ bracket_depth -= 1
+ if bracket_depth < 0:
+ break
+ elif ch == '[':
+ bracket_depth += 1
+ else:
+ # Closing bracket not found within sentinel length.
+ # This isn't markup.
+ curr_pos = start_idx + 1
+ continue
+ link_text = text[start_idx+1:p]
+
+ # Possibly a footnote ref?
+ if "footnotes" in self.extras and link_text.startswith("^"):
+ normed_id = re.sub(r'\W', '-', link_text[1:])
+ if normed_id in self.footnotes:
+ self.footnote_ids.append(normed_id)
+ result = '<sup class="footnote-ref" id="fnref-%s">' \
+ '<a href="#fn-%s">%s</a></sup>' \
+ % (normed_id, normed_id, len(self.footnote_ids))
+ text = text[:start_idx] + result + text[p+1:]
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = p+1
+ continue
+
+ # Now determine what this is by the remainder.
+ p += 1
+ if p == text_length:
+ return text
+
+ # Inline anchor or img?
+ if text[p] == '(': # attempt at perf improvement
+ match = self._tail_of_inline_link_re.match(text, p)
+ if match:
+ # Handle an inline anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+
+ url, title = match.group("url"), match.group("title")
+ if url and url[0] == '<':
+ url = url[1:-1] # '<url>' -> 'url'
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ if title:
+ title_str = ' title="%s"' \
+ % title.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_']) \
+ .replace('"', '"')
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url, link_text.replace('"', '"'),
+ title_str, self.empty_element_suffix)
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ continue
+
+ # Reference anchor or img?
+ else:
+ match = self._tail_of_reference_link_re.match(text, p)
+ if match:
+ # Handle a reference-style anchor or img.
+ is_img = start_idx > 0 and text[start_idx-1] == "!"
+ if is_img:
+ start_idx -= 1
+ link_id = match.group("id").lower()
+ if not link_id:
+ link_id = link_text.lower() # for links like [this][]
+ if link_id in self.urls:
+ url = self.urls[link_id]
+ # We've got to encode these to avoid conflicting
+ # with italics/bold.
+ url = url.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ title = self.titles.get(link_id)
+ if title:
+ title = title.replace('*', g_escape_table['*']) \
+ .replace('_', g_escape_table['_'])
+ title_str = ' title="%s"' % title
+ else:
+ title_str = ''
+ if is_img:
+ result = '<img src="%s" alt="%s"%s%s' \
+ % (url, link_text.replace('"', '"'),
+ title_str, self.empty_element_suffix)
+ curr_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ elif start_idx >= anchor_allowed_pos:
+ result = '<a href="%s"%s>%s</a>' \
+ % (url, title_str, link_text)
+ result_head = '<a href="%s"%s>' % (url, title_str)
+ result = '%s%s</a>' % (result_head, link_text)
+ # <img> allowed from curr_pos on, <a> from
+ # anchor_allowed_pos on.
+ curr_pos = start_idx + len(result_head)
+ anchor_allowed_pos = start_idx + len(result)
+ text = text[:start_idx] + result + text[match.end():]
+ else:
+ # Anchor not allowed here.
+ curr_pos = start_idx + 1
+ else:
+ # This id isn't defined, leave the markup alone.
+ curr_pos = match.end()
+ continue
+
+ # Otherwise, it isn't markup.
+ curr_pos = start_idx + 1
+
+ return text
+
+
+ _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
+ def _setext_h_sub(self, match):
+ n = {"=": 1, "-": 2}[match.group(2)[0]]
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ return "<h%d>%s</h%d>\n\n" \
+ % (n, self._run_span_gamut(match.group(1)), n)
+
+ _atx_h_re = re.compile(r'''
+ ^([\#=]{1,6}) # \1 = string of #'s
+ [ \t]*
+ (.+?) # \2 = Header text
+ [ \t]*
+ (?<!\\) # ensure not an escaped trailing '#'
+ [\#=]* # optional closing #'s (not counted)
+ \n+
+ ''', re.X | re.M)
+ def _atx_h_sub(self, match):
+ n = len(match.group(1))
+ demote_headers = self.extras.get("demote-headers")
+ if demote_headers:
+ n = min(n + demote_headers, 6)
+ return "<h%d>%s</h%d>\n\n" \
+ % (n, self._run_span_gamut(match.group(2)), n)
+
+ def _do_headers(self, text):
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ text = self._setext_h_re.sub(self._setext_h_sub, text)
+
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ text = self._atx_h_re.sub(self._atx_h_sub, text)
+
+ return text
+
+
+ _marker_ul_chars = '*+-'
+ _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
+ _marker_ul = '(?:[%s])' % _marker_ul_chars
+ _marker_ol = r'(?:\d+\.)'
+
+ def _list_sub(self, match):
+ lst = match.group(1)
+ lst_type = match.group(3) in self._marker_ul_chars and "ul" or "ol"
+ result = self._process_list_items(lst)
+ if self.list_level:
+ return "<%s>\n%s</%s>\n" % (lst_type, result, lst_type)
+ else:
+ return "<%s>\n%s</%s>\n\n" % (lst_type, result, lst_type)
+
+ def _do_lists(self, text):
+ # Form HTML ordered (numbered) and unordered (bulleted) lists.
+
+ for marker_pat in (self._marker_ul, self._marker_ol):
+ # Re-usable pattern to match any entire ul or ol list:
+ less_than_tab = self.tab_width - 1
+ whole_list = r'''
+ ( # \1 = whole list
+ ( # \2
+ [ ]{0,%d}
+ (%s) # \3 = first list item marker
+ [ \t]+
+ )
+ (?:.+?)
+ ( # \4
+ \Z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ %s[ \t]+
+ )
+ )
+ )
+ ''' % (less_than_tab, marker_pat, marker_pat)
+
+ # We use a different prefix before nested lists than top-level lists.
+ # See extended comment in _process_list_items().
+ #
+ # Note: There's a bit of duplication here. My original implementation
+ # created a scalar regex pattern as the conditional result of the test on
+ # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
+ # substitution once, using the scalar as the pattern. This worked,
+ # everywhere except when running under MT on my hosting account at Pair
+ # Networks. There, this caused all rebuilds to be killed by the reaper (or
+ # perhaps they crashed, but that seems incredibly unlikely given that the
+ # same script on the same server ran fine *except* under MT. I've spent
+ # more time trying to figure out why this is happening than I'd like to
+ # admit. My only guess, backed up by the fact that this workaround works,
+ # is that Perl optimizes the substition when it can figure out that the
+ # pattern will never change, and when this optimization isn't on, we run
+ # afoul of the reaper. Thus, the slightly redundant code to that uses two
+ # static s/// patterns rather than one conditional pattern.
+
+ if self.list_level:
+ sub_list_re = re.compile("^"+whole_list, re.X | re.M | re.S)
+ text = sub_list_re.sub(self._list_sub, text)
+ else:
+ list_re = re.compile(r"(?:(?<=\n\n)|\A\n?)"+whole_list,
+ re.X | re.M | re.S)
+ text = list_re.sub(self._list_sub, text)
+
+ return text
+
+ _list_item_re = re.compile(r'''
+ (\n)? # leading line = \1
+ (^[ \t]*) # leading whitespace = \2
+ (%s) [ \t]+ # list marker = \3
+ ((?:.+?) # list item text = \4
+ (\n{1,2})) # eols = \5
+ (?= \n* (\Z | \2 (%s) [ \t]+))
+ ''' % (_marker_any, _marker_any),
+ re.M | re.X | re.S)
+
+ _last_li_endswith_two_eols = False
+ def _list_item_sub(self, match):
+ item = match.group(4)
+ leading_line = match.group(1)
+ leading_space = match.group(2)
+ if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
+ item = self._run_block_gamut(self._outdent(item))
+ else:
+ # Recursion for sub-lists:
+ item = self._do_lists(self._outdent(item))
+ if item.endswith('\n'):
+ item = item[:-1]
+ item = self._run_span_gamut(item)
+ self._last_li_endswith_two_eols = (len(match.group(5)) == 2)
+ return "<li>%s</li>\n" % item
+
+ def _process_list_items(self, list_str):
+ # Process the contents of a single ordered or unordered list,
+ # splitting it into individual list items.
+
+ # The $g_list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+ self.list_level += 1
+ self._last_li_endswith_two_eols = False
+ list_str = list_str.rstrip('\n') + '\n'
+ list_str = self._list_item_re.sub(self._list_item_sub, list_str)
+ self.list_level -= 1
+ return list_str
+
+ def _get_pygments_lexer(self, lexer_name):
+ try:
+ from pygments import lexers, util
+ except ImportError:
+ return None
+ try:
+ return lexers.get_lexer_by_name(lexer_name)
+ except util.ClassNotFound:
+ return None
+
+ def _color_with_pygments(self, codeblock, lexer, **formatter_opts):
+ import pygments
+ import pygments.formatters
+
+ class HtmlCodeFormatter(pygments.formatters.HtmlFormatter):
+ def _wrap_code(self, inner):
+ """A function for use in a Pygments Formatter which
+ wraps in <code> tags.
+ """
+ yield 0, "<code>"
+ for tup in inner:
+ yield tup
+ yield 0, "</code>"
+
+ def wrap(self, source, outfile):
+ """Return the source with a code, pre, and div."""
+ return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
+
+ formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts)
+ return pygments.highlight(codeblock, lexer, formatter)
+
+ def _code_block_sub(self, match):
+ codeblock = match.group(1)
+ codeblock = self._outdent(codeblock)
+ codeblock = self._detab(codeblock)
+ codeblock = codeblock.lstrip('\n') # trim leading newlines
+ codeblock = codeblock.rstrip() # trim trailing whitespace
+
+ if "code-color" in self.extras and codeblock.startswith(":::"):
+ lexer_name, rest = codeblock.split('\n', 1)
+ lexer_name = lexer_name[3:].strip()
+ lexer = self._get_pygments_lexer(lexer_name)
+ codeblock = rest.lstrip("\n") # Remove lexer declaration line.
+ if lexer:
+ formatter_opts = self.extras['code-color'] or {}
+ colored = self._color_with_pygments(codeblock, lexer,
+ **formatter_opts)
+ return "\n\n%s\n\n" % colored
+
+ codeblock = self._encode_code(codeblock)
+ return "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock
+
+ def _do_code_blocks(self, text):
+ """Process Markdown `<pre><code>` blocks."""
+ code_block_re = re.compile(r'''
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
+ ''' % (self.tab_width, self.tab_width),
+ re.M | re.X)
+
+ return code_block_re.sub(self._code_block_sub, text)
+
+
+ # Rules for a code span:
+ # - backslash escapes are not interpreted in a code span
+ # - to include one or or a run of more backticks the delimiters must
+ # be a longer run of backticks
+ # - cannot start or end a code span with a backtick; pad with a
+ # space and that space will be removed in the emitted HTML
+ # See `test/tm-cases/escapes.text` for a number of edge-case
+ # examples.
+ _code_span_re = re.compile(r'''
+ (?<!\\)
+ (`+) # \1 = Opening run of `
+ (?!`) # See Note A test/tm-cases/escapes.text
+ (.+?) # \2 = The code block
+ (?<!`)
+ \1 # Matching closer
+ (?!`)
+ ''', re.X | re.S)
+
+ def _code_span_sub(self, match):
+ c = match.group(2).strip(" \t")
+ c = self._encode_code(c)
+ return "<code>%s</code>" % c
+
+ def _do_code_spans(self, text):
+ # * Backtick quotes are used for <code></code> spans.
+ #
+ # * You can use multiple backticks as the delimiters if you want to
+ # include literal backticks in the code span. So, this input:
+ #
+ # Just type ``foo `bar` baz`` at the prompt.
+ #
+ # Will translate to:
+ #
+ # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+ #
+ # There's no arbitrary limit to the number of backticks you
+ # can use as delimters. If you need three consecutive backticks
+ # in your code, use four for delimiters, etc.
+ #
+ # * You can use spaces to get literal backticks at the edges:
+ #
+ # ... type `` `bar` `` ...
+ #
+ # Turns to:
+ #
+ # ... type <code>`bar`</code> ...
+ return self._code_span_re.sub(self._code_span_sub, text)
+
+ def _encode_code(self, text):
+ """Encode/escape certain characters inside Markdown code runs.
+ The point is that in code, these characters are literals,
+ and lose their special Markdown meanings.
+ """
+ replacements = [
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
+ ('&', '&'),
+ # Do the angle bracket song and dance:
+ ('<', '<'),
+ ('>', '>'),
+ # Now, escape characters that are magic in Markdown:
+ ('*', g_escape_table['*']),
+ ('_', g_escape_table['_']),
+ ('{', g_escape_table['{']),
+ ('}', g_escape_table['}']),
+ ('[', g_escape_table['[']),
+ (']', g_escape_table[']']),
+ ('\\', g_escape_table['\\']),
+ ]
+ for before, after in replacements:
+ text = text.replace(before, after)
+ return text
+
+ _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
+ _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
+ _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
+ _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
+ def _do_italics_and_bold(self, text):
+ # <strong> must go first:
+ if "code-friendly" in self.extras:
+ text = self._code_friendly_strong_re.sub(r"<strong>\1</strong>", text)
+ text = self._code_friendly_em_re.sub(r"<em>\1</em>", text)
+ else:
+ text = self._strong_re.sub(r"<strong>\2</strong>", text)
+ text = self._em_re.sub(r"<em>\2</em>", text)
+ return text
+
+
+ _block_quote_re = re.compile(r'''
+ ( # Wrap whole match in \1
+ (
+ ^[ \t]*>[ \t]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )
+ ''', re.M | re.X)
+ _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M);
+
+ _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
+ def _dedent_two_spaces_sub(self, match):
+ return re.sub(r'(?m)^ ', '', match.group(1))
+
+ def _block_quote_sub(self, match):
+ bq = match.group(1)
+ bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
+ bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines
+ bq = self._run_block_gamut(bq) # recurse
+
+ bq = re.sub('(?m)^', ' ', bq)
+ # These leading spaces screw with <pre> content, so we need to fix that:
+ bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
+
+ return "<blockquote>\n%s\n</blockquote>\n\n" % bq
+
+ def _do_block_quotes(self, text):
+ if '>' not in text:
+ return text
+ return self._block_quote_re.sub(self._block_quote_sub, text)
+
+ def _form_paragraphs(self, text):
+ # Strip leading and trailing lines:
+ text = text.strip('\n')
+
+ # Wrap <p> tags.
+ grafs = re.split(r"\n{2,}", text)
+ for i, graf in enumerate(grafs):
+ if graf in self.html_blocks:
+ # Unhashify HTML blocks
+ grafs[i] = self.html_blocks[graf]
+ else:
+ # Wrap <p> tags.
+ graf = self._run_span_gamut(graf)
+ grafs[i] = "<p>" + graf.lstrip(" \t") + "</p>"
+
+ return "\n\n".join(grafs)
+
+ def _add_footnotes(self, text):
+ if self.footnotes:
+ footer = [
+ '<div class="footnotes">',
+ '<hr' + self.empty_element_suffix,
+ '<ol>',
+ ]
+ for i, id in enumerate(self.footnote_ids):
+ if i != 0:
+ footer.append('')
+ footer.append('<li id="fn-%s">' % id)
+ footer.append(self._run_block_gamut(self.footnotes[id]))
+ backlink = ('<a href="#fnref-%s" '
+ 'class="footnoteBackLink" '
+ 'title="Jump back to footnote %d in the text.">'
+ '↩</a>' % (id, i+1))
+ if footer[-1].endswith("</p>"):
+ footer[-1] = footer[-1][:-len("</p>")] \
+ + ' ' + backlink + "</p>"
+ else:
+ footer.append("\n<p>%s</p>" % backlink)
+ footer.append('</li>')
+ footer.append('</ol>')
+ footer.append('</div>')
+ return text + '\n\n' + '\n'.join(footer)
+ else:
+ return text
+
+ # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+ # http://bumppo.net/projects/amputator/
+ _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)')
+ _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
+ _naked_gt_re = re.compile(r'''(?<![a-z?!/'"-])>''', re.I)
+
+ def _encode_amps_and_angles(self, text):
+ # Smart processing for ampersands and angle brackets that need
+ # to be encoded.
+ text = self._ampersand_re.sub('&', text)
+
+ # Encode naked <'s
+ text = self._naked_lt_re.sub('<', text)
+
+ # Encode naked >'s
+ # Note: Other markdown implementations (e.g. Markdown.pl, PHP
+ # Markdown) don't do this.
+ text = self._naked_gt_re.sub('>', text)
+ return text
+
+ def _encode_backslash_escapes(self, text):
+ for ch, escape in g_escape_table.items():
+ text = text.replace("\\"+ch, escape)
+ return text
+
+ _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
+ def _auto_link_sub(self, match):
+ g1 = match.group(1)
+ return '<a href="%s">%s</a>' % (g1, g1)
+
+ _auto_email_link_re = re.compile(r"""
+ <
+ (?:mailto:)?
+ (
+ [-.\w]+
+ \@
+ [-\w]+(\.[-\w]+)*\.[a-z]+
+ )
+ >
+ """, re.I | re.X | re.U)
+ def _auto_email_link_sub(self, match):
+ return self._encode_email_address(
+ self._unescape_special_chars(match.group(1)))
+
+ def _do_auto_links(self, text):
+ text = self._auto_link_re.sub(self._auto_link_sub, text)
+ text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
+ return text
+
+ def _encode_email_address(self, addr):
+ # Input: an email address, e.g. "foo@example.com"
+ #
+ # Output: the email address as a mailto link, with each character
+ # of the address encoded as either a decimal or hex entity, in
+ # the hopes of foiling most address harvesting spam bots. E.g.:
+ #
+ # <a href="mailto:foo@e
+ # xample.com">foo
+ # @example.com</a>
+ #
+ # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
+ # mailing list: <http://tinyurl.com/yu7ue>
+ chars = [_xml_encode_email_char_at_random(ch)
+ for ch in "mailto:" + addr]
+ # Strip the mailto: from the visible part.
+ addr = '<a href="%s">%s</a>' \
+ % (''.join(chars), ''.join(chars[7:]))
+ return addr
+
+ def _do_link_patterns(self, text):
+ """Caveat emptor: there isn't much guarding against link
+ patterns being formed inside other standard Markdown links, e.g.
+ inside a [link def][like this].
+
+ Dev Notes: *Could* consider prefixing regexes with a negative
+ lookbehind assertion to attempt to guard against this.
+ """
+ link_from_hash = {}
+ for regex, repl in self.link_patterns:
+ replacements = []
+ for match in regex.finditer(text):
+ if hasattr(repl, "__call__"):
+ href = repl(match)
+ else:
+ href = match.expand(repl)
+ replacements.append((match.span(), href))
+ for (start, end), href in reversed(replacements):
+ escaped_href = (
+ href.replace('"', '"') # b/c of attr quote
+ # To avoid markdown <em> and <strong>:
+ .replace('*', g_escape_table['*'])
+ .replace('_', g_escape_table['_']))
+ link = '<a href="%s">%s</a>' % (escaped_href, text[start:end])
+ hash = md5(link).hexdigest()
+ link_from_hash[hash] = link
+ text = text[:start] + hash + text[end:]
+ for hash, link in link_from_hash.items():
+ text = text.replace(hash, link)
+ return text
+
+ def _unescape_special_chars(self, text):
+ # Swap back in all the special characters we've hidden.
+ for ch, hash in g_escape_table.items():
+ text = text.replace(hash, ch)
+ return text
+
+ def _outdent(self, text):
+ # Remove one level of line-leading tabs or spaces
+ return self._outdent_re.sub('', text)
+
+
+class MarkdownWithExtras(Markdown):
+ """A markdowner class that enables most extras:
+
+ - footnotes
+ - code-color (only has effect if 'pygments' Python module on path)
+
+ These are not included:
+ - pyshell (specific to Python-related documenting)
+ - code-friendly (because it *disables* part of the syntax)
+ - link-patterns (because you need to specify some actual
+ link-patterns anyway)
+ """
+ extras = ["footnotes", "code-color"]
+
+
+#---- internal support functions
+
+# From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
+def _curry(*args, **kwargs):
+ function, args = args[0], args[1:]
+ def result(*rest, **kwrest):
+ combined = kwargs.copy()
+ combined.update(kwrest)
+ return function(*args + rest, **combined)
+ return result
+
+# Recipe: regex_from_encoded_pattern (1.0)
+def _regex_from_encoded_pattern(s):
+ """'foo' -> re.compile(re.escape('foo'))
+ '/foo/' -> re.compile('foo')
+ '/foo/i' -> re.compile('foo', re.I)
+ """
+ if s.startswith('/') and s.rfind('/') != 0:
+ # Parse it: /PATTERN/FLAGS
+ idx = s.rfind('/')
+ pattern, flags_str = s[1:idx], s[idx+1:]
+ flag_from_char = {
+ "i": re.IGNORECASE,
+ "l": re.LOCALE,
+ "s": re.DOTALL,
+ "m": re.MULTILINE,
+ "u": re.UNICODE,
+ }
+ flags = 0
+ for char in flags_str:
+ try:
+ flags |= flag_from_char[char]
+ except KeyError:
+ raise ValueError("unsupported regex flag: '%s' in '%s' "
+ "(must be one of '%s')"
+ % (char, s, ''.join(flag_from_char.keys())))
+ return re.compile(s[1:idx], flags)
+ else: # not an encoded regex
+ return re.compile(re.escape(s))
+
+# Recipe: dedent (0.1.2)
+def _dedentlines(lines, tabsize=8, skip_first_line=False):
+ """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
+
+ "lines" is a list of lines to dedent.
+ "tabsize" is the tab width to use for indent width calculations.
+ "skip_first_line" is a boolean indicating if the first line should
+ be skipped for calculating the indent width and for dedenting.
+ This is sometimes useful for docstrings and similar.
+
+ Same as dedent() except operates on a sequence of lines. Note: the
+ lines list is modified **in-place**.
+ """
+ DEBUG = False
+ if DEBUG:
+ print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
+ % (tabsize, skip_first_line)
+ indents = []
+ margin = None
+ for i, line in enumerate(lines):
+ if i == 0 and skip_first_line: continue
+ indent = 0
+ for ch in line:
+ if ch == ' ':
+ indent += 1
+ elif ch == '\t':
+ indent += tabsize - (indent % tabsize)
+ elif ch in '\r\n':
+ continue # skip all-whitespace lines
+ else:
+ break
+ else:
+ continue # skip all-whitespace lines
+ if DEBUG: print "dedent: indent=%d: %r" % (indent, line)
+ if margin is None:
+ margin = indent
+ else:
+ margin = min(margin, indent)
+ if DEBUG: print "dedent: margin=%r" % margin
+
+ if margin is not None and margin > 0:
+ for i, line in enumerate(lines):
+ if i == 0 and skip_first_line: continue
+ removed = 0
+ for j, ch in enumerate(line):
+ if ch == ' ':
+ removed += 1
+ elif ch == '\t':
+ removed += tabsize - (removed % tabsize)
+ elif ch in '\r\n':
+ if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line
+ lines[i] = lines[i][j:]
+ break
+ else:
+ raise ValueError("unexpected non-whitespace char %r in "
+ "line %r while removing %d-space margin"
+ % (ch, line, margin))
+ if DEBUG:
+ print "dedent: %r: %r -> removed %d/%d"\
+ % (line, ch, removed, margin)
+ if removed == margin:
+ lines[i] = lines[i][j+1:]
+ break
+ elif removed > margin:
+ lines[i] = ' '*(removed-margin) + lines[i][j+1:]
+ break
+ else:
+ if removed:
+ lines[i] = lines[i][removed:]
+ return lines
+
+def _dedent(text, tabsize=8, skip_first_line=False):
+ """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
+
+ "text" is the text to dedent.
+ "tabsize" is the tab width to use for indent width calculations.
+ "skip_first_line" is a boolean indicating if the first line should
+ be skipped for calculating the indent width and for dedenting.
+ This is sometimes useful for docstrings and similar.
+
+ textwrap.dedent(s), but don't expand tabs to spaces
+ """
+ lines = text.splitlines(1)
+ _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
+ return ''.join(lines)
+
+
+class _memoized(object):
+ """Decorator that caches a function's return value each time it is called.
+ If called later with the same arguments, the cached value is returned, and
+ not re-evaluated.
+
+ http://wiki.python.org/moin/PythonDecoratorLibrary
+ """
+ def __init__(self, func):
+ self.func = func
+ self.cache = {}
+ def __call__(self, *args):
+ try:
+ return self.cache[args]
+ except KeyError:
+ self.cache[args] = value = self.func(*args)
+ return value
+ except TypeError:
+ # uncachable -- for instance, passing a list as an argument.
+ # Better to not cache than to blow up entirely.
+ return self.func(*args)
+ def __repr__(self):
+ """Return the function's docstring."""
+ return self.func.__doc__
+
+
+def _xml_oneliner_re_from_tab_width(tab_width):
+ """Standalone XML processing instruction regex."""
+ return re.compile(r"""
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{0,%d}
+ (?:
+ <\?\w+\b\s+.*?\?> # XML processing instruction
+ |
+ <\w+:\w+\b\s+.*?/> # namespaced single tag
+ )
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ """ % (tab_width - 1), re.X)
+_xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
+
+def _hr_tag_re_from_tab_width(tab_width):
+ return re.compile(r"""
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in \1
+ [ ]{0,%d}
+ <(hr) # start tag = \2
+ \b # word break
+ ([^<>])*? #
+ /?> # the matching end tag
+ [ \t]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+ )
+ """ % (tab_width - 1), re.X)
+_hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
+
+
+def _xml_encode_email_char_at_random(ch):
+ r = random()
+ # Roughly 10% raw, 45% hex, 45% dec.
+ # '@' *must* be encoded. I [John Gruber] insist.
+ if r > 0.9 and ch != "@":
+ return ch
+ elif r < 0.45:
+ # The [1:] is to drop leading '0': 0x63 -> x63
+ return '&#%s;' % hex(ord(ch))[1:]
+ else:
+ return '&#%s;' % ord(ch)
+
+def _hash_text(text):
+ return 'md5:'+md5(text.encode("utf-8")).hexdigest()
+
+
+
+text = """\
+Dies ist ein Text.
+
+---
+
+* Test
+* Mu
+* Blah
+"""
+
+#markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
+# safe_mode=None, extras=None, link_patterns=None):
+#html = markdown(text, html4tags=False)
+#
+#print html
+
+
+
+@set_hook("read")
+def read(params):
+ file = params.file
+ if file.rel_path.endswith(".md"):
+ file.render = "html"
+ f = file.read_keywords()
+ return f.read()
+
+
+_markdown = None
+
+@set_hook("htmlize")
+def htmlize(params):
+ """Parse Markdown and convert it to HTML :-)"""
+
+ file = params.file
+ if not file.rel_path.endswith(".md"):
+ return
+
+ global _markdown
+ if not _markdown:
+ _markdown = Markdown(extras={
+ "code-friendly":True,
+ "xml":True,
+ "demote-headers":1,
+ "code-color":{}})
+ html = _markdown.convert(params.file.contents, params.file.input_encoding)
+ #print type(html)
+ #print html
+ return html
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+from docutils.writers import html4css1
+from docutils import core
+
+
+@set_hook("read")
+def read(params):
+ file = params.file
+ if file.rel_path.endswith(".rst"):
+ file.render = "html"
+ f = file.read_keywords()
+ return f.read()
+
+
+
+class WebHTMLTranslator(html4css1.HTMLTranslator):
+ doctype = ""
+ content_type = "<!--%s-->"
+ generator = "<!--%s-->"
+
+ def __init__(self, document):
+ html4css1.HTMLTranslator.__init__(self, document)
+ self.head_prefix = []
+ self.body_prefix = []
+ self.stylesheet = []
+ self.body_suffix = []
+ self.section_level = 1
+
+ def visit_system_message(self, node):
+ pass
+
+ def visit_document (self, node):
+ pass
+
+ def depart_document (self, node):
+ pass
+
+class WebWriter(html4css1.Writer):
+ def __init__ (self):
+ html4css1.Writer.__init__(self)
+ self.translator_class = WebHTMLTranslator
+
+
+@set_hook("htmlize")
+def htmlize(params):
+ "Parse text as RST and convert it to HTML"
+
+ file = params.file
+ if not file.rel_path.endswith(".rst"):
+ return
+
+ contents = file.contents
+
+ settings = {
+ # cloak email addresses to reduce spam
+ 'cloak_email_addresses': 1,
+ # Emit headers as H2, because H1 is already used
+ 'doctitle_xform': False,
+ 'strip_comments': 'true',
+ #'dump_pseudo_xml': 'true',
+ #'dump_settings': 'true',
+ #'dump_transforms': 'true',
+ # TODO: language_code?
+ }
+ # http://docutils.sourceforge.net/docs/dev/hacking.html
+ # /usr/share/doc/python-docutils/
+ document = core.publish_doctree(
+ source_path=params.file.rel_path,
+ source=contents,
+ settings_overrides=settings)
+ return core.publish_from_doctree(document,
+ writer=WebWriter(),
+ writer_name='html',
+ destination_path=params.file.rel_path,
+ settings_overrides=settings)
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+
+
+#
+# The hook "addoptions" can be used by plugins to add their own
+# command line options.
+#
+# params.parser contains the optparse based parser
+#
+@set_hook("addoptions")
+def test_addoptions(params):
+ #print "in skeleton.addoptions"
+ params.parser.add_option("-V", "--test_verbose", action="count",
+ dest="test_verbose", default=0,
+ help="print status messages to stdout")
+
+
+#
+# After the command-line options have been processed and incorporated into
+# config object, the hook "checkconfig" is called. Here each plugin can
+# check if the specified configurations are sane.
+#
+# params is empty, use cfg instead
+#
+@set_hook("checkconfig")
+def checkconfig(params):
+ if cfg.test_verbose:
+ print "in skeleton.checkconfig"
+ #cfg.blah = "muh"
+
+
+#
+# Just before walking the directory tree, the hook "start"
+# get's called.
+#
+# param is empty
+#
+@set_hook("start")
+def finish(params):
+ if cfg.test_verbose:
+ print "in skeleton.start"
+
+
+#
+# For each file that is not excluded (and not in an excluded directory, the
+# hook "read" is called. Usually a reader-plugin (e.g. "rst" or
+# "markdown") looks at the file extension of the file parameter.
+#
+# If the plugin declares itself responsible for this file, it should return
+# the contents of the file. It also should set file.reader to some text
+# string that describes itself.
+#
+# params.direc contains a "class Directory" object
+# params.file contains a "class File" object
+#
+@set_hook("read")
+def read(params):
+ if cfg.test_verbose:
+ print "in skeleton.read", params.file.rel_path
+ #return "contents of file"
+
+
+#
+# After a file has been read in, any plugin can filter it's raw
+# text.
+#
+# params.direc contains the "class Directory" object
+# params.file has the "class File" object
+# params.contents contains the text
+#
+@set_hook("filter")
+def filter(params):
+ if cfg.test_verbose:
+ print "in skeleton.filter", params.file.rel_path
+ if cfg.verbose > 6:
+ params.contents = "contents deleted by skeleton.filter"
+
+
+#
+# "scan" should scan for meta-data, mostly for links.
+#
+# params.direc contains the "class Directory" object
+# params.file has the "class File" object
+# params.file.contents contains the text
+#
+@set_hook("scan")
+def scan(params):
+ if cfg.test_verbose:
+ print "in skeleton.scan", params.file.rel_path
+
+
+#
+# "scan_done" is called once after all files have been scanned
+#
+# params is empty
+#
+@set_hook("scan_done")
+def scan_done(params):
+ if cfg.test_verbose:
+ print "in skeleton.scan_done"
+
+
+#
+# The "htmlize" converts the contents into html. The
+# first htmlize hook that returs anything wins, no other
+# htmlize hooks will be called.
+#
+# params.direc contains the "class Directory" object
+# params.file has the "class File" object
+# params.file.contents contains the text
+#
+@set_hook("htmlize")
+def htmlize(params):
+ if cfg.test_verbose:
+ print "in skeleton.htmlize", params.file.rel_path
+
+
+#
+# The "linkify" hook converts any link to html.
+#
+# params.direc contains the "class Directory" object
+# params.file has the "class File" object
+# params.file.contents contains body text of the page
+#
+@set_hook("linkify")
+def linkify(params):
+ if cfg.test_verbose:
+ print "in skeleton.linkify", params.file.rel_path
+
+
+#
+# At the very end of the program execution, the hook "finish"
+# get's called.
+#
+# params is empty
+#
+@set_hook("finish")
+def finish(params):
+ if cfg.test_verbose:
+ print "in skeleton.finish"
+
+
+
+# TODO: Description missing
+@set_macro("sample")
+def sample_macro(params):
+ if cfg.test_verbose:
+ print "in macro skeleton.sample_macro, params:", params
+ return "{ output of sample macro }"
+
+
+
+# TODO: Description missing
+@set_function("func")
+def sample_func():
+ if cfg.test_verbose:
+ print "in macro skeleton.sample_func"
+ return "{ output from sample function }"
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+from webber import *
+from mako.lookup import TemplateLookup
+import os
+
+"""
+The make template renders a *.tmpl file which can contain things like
+
+ ${file} the current File object
+ ${body} HTML for the main contents
+ ${rootpath} (relative!) path to the web site root directory
+ ${description} used for meta=
+ ${keywords} used for meta=
+
+ ... and also all functions decorated with "@set_function(name)".
+"""
+
+
+template_cache = {}
+
+def get_template(file):
+ filename = file.template
+ extra_dir = os.path.split(file.path)[0]
+ if not filename.endswith('.tmpl'):
+ filename += '.tmpl'
+ key = "%s %s" % (filename, extra_dir)
+ if template_cache.has_key(key):
+ return template_cache[key]
+ else:
+ lookup = TemplateLookup(
+ directories = (extra_dir, file.style_dir),
+ output_encoding = file.output_encoding,
+ encoding_errors='replace',
+ filesystem_checks = False)
+ tmpl = lookup.get_template(filename)
+
+ template_cache[key] = tmpl
+ return tmpl
+
+
+@set_hook("pagetemplate")
+def pagetemplate(params):
+ #print "in webber_template_mako.pagetemplate"
+ #print params.file
+
+ kw = {}
+ kw["file"] = params.file
+ if isinstance(params.file.contents, unicode):
+ kw["body"] = params.file.contents
+ else:
+ kw["body"] = unicode(params.file.contents, 'iso-8859-1')
+
+ #print "path:", params.file.out_path
+ root = []
+ for i in range(params.file.out_path.count("/")):
+ root.append("..")
+ #print "root:", root
+ root = "/".join(root)
+ if root:
+ root = root + "/"
+ #print "root:", root
+ kw["rootpath"] = root
+ try:
+ kw["description"] = params.file.description
+ except:
+ kw["description"] = ""
+ try:
+ kw['keywords'] = params.file.keywords
+ except:
+ kw['keywords'] = []
+ kw.update(functions)
+ tmpl = get_template(params.file)
+
+ contents = tmpl.render(**kw)
+ return contents
--- /dev/null
+#!/usr/bin/python
+# -*- coding: iso-8859-1 -*-
+
+import sys
+from webber import main
+
+if __name__ == "__main__":
+ if '--profile' in sys.argv:
+
+ sys.argv.remove('--profile')
+ print 'Starting profile'
+
+ import hotshot, hotshot.stats
+ prof = hotshot.Profile('newbuild.prof')
+ prof.runcall(main)
+ prof.close()
+
+ print 'Profile completed'
+
+ stats = hotshot.stats.load('newbuild.prof')
+ #stats.strip_dirs()
+ stats.sort_stats('time', 'calls')
+ stats.print_stats(50)
+
+ else:
+ main()
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+import sys, os, optparse, fnmatch, stat, re, time, types
+from config import Holder
+
+
+
+###############################################################################
+#
+# Global variables
+#
+
+__all__ = [
+ # Globals
+ "cfg", # configuration from webber.ini
+ "directories", # global hash of directories, by rel_path
+ "files", # global hash of files, by rel_path
+ "functions", # all exported template functions
+
+ # Functions
+ "set_hook", # decorator for hook-functions
+ "set_macro", # define macro
+ "set_function", # define functions for the template
+ "get_file_for",
+ "get_link_from",
+ "get_current_file", # because mako-called functions cannot access the
+ # current File object
+ "get_program_directory",
+ "log", # misc logging functions
+ "info",
+ "warning",
+ "error",
+ ]
+
+
+
+###############################################################################
+#
+# Configuration class
+#
+
+cfg = Holder()
+
+
+
+directories = {}
+
+class Directory(Holder):
+ """This stores per-directory information. Each file has a pointer
+ to a directory object."""
+
+ def __init__(self, **kw):
+ Holder.__init__(self, **kw)
+ directories[kw["rel_path"]] = self
+
+
+files = {}
+current_file = None
+
+class File(Holder):
+ """This stores file information."""
+
+ def __init__(self, **kw):
+ Holder.__init__(self, **kw)
+ files[kw["rel_path"]] = self
+ self.render = None
+ mtime = os.stat(self.path)[stat.ST_MTIME]
+ self.mtime = mtime
+ self.ctime = mtime
+ #print self.keys()
+
+ reKeywords = re.compile(r'(\S+)\s*:\s*(.*)')
+ #reIsoDate = re.compile(r'(\d\d\d\d)-(\d\d)-(\d\d)')
+
+ def read_keywords(self, terminate_line=""):
+ """Opens the file and reads "key: value" pairs on the top of it. Returns
+ the open file handle for further processing by some plugins/read_*.py code."""
+ f = open(self.path)
+ while True:
+ s = f.readline().strip()
+ if s==terminate_line:
+ break
+ m = self.reKeywords.match(s)
+ if not m:
+ warning("%s: wrong 'key: value' line '%s'" % (self.rel_path, s))
+ break
+ key = m.group(1).lower()
+ val = m.group(2)
+
+ if key == "mtime":
+ val = iso_to_time(val)
+
+ if key == "ctime":
+ val = iso_to_time(val)
+
+ if key == "title":
+ if not self.has_key("linktitle"):
+ self["linktitle"] = val
+
+ #print self.rel_path, key, val
+ self[key] = val
+ return f
+
+
+_get_file_for_cache = {}
+def get_file_for(name):
+ """webber.files is an hash of File objects, but keyed on the real file name.
+ This function returns a File object for a specific linktitle."""
+
+ try:
+ return _get_file_for_cache[name]
+ except:
+ pass
+
+ #print "get_file_for:", name
+ for s in files:
+ f = files[s]
+ try:
+ if f.linktitle == name:
+ #print " via linktitle:", s
+ _get_file_for_cache[name] = f
+ return f
+ except:
+ pass
+ # Allow exact match as well
+ if s == name:
+ #print " exact:", s
+ _get_file_for_cache[name] = f
+ return f
+ #print " not found"
+
+
+def relpath(base_path, target):
+ """\
+ Return a relative path to the target from either the current directory
+ or an optional base directory.
+
+ Base can be a directory specified either as absolute or relative
+ to current directory."""
+ # Code from http://code.activestate.com/recipes/302594/
+
+ def commonpath(a, b):
+ """Returns the longest common to 'paths' path.
+
+ Unlike the strange commonprefix:
+ - this returns valid path
+ - accepts only two arguments
+ """
+ if a == b:
+ return a
+ while len(a) > 0:
+ if a == b:
+ return a
+ if len(a) > len(b):
+ a = os.path.dirname(a)
+ else:
+ b = os.path.dirname(b)
+ return None
+
+ base_path = os.path.normpath(os.path.normcase(base_path))
+ target = os.path.normpath(os.path.normcase(target))
+
+ if base_path == target:
+ return '.'
+
+ # On the windows platform the target may be on a different drive.
+ if os.path.splitdrive(base_path)[0] != os.path.splitdrive(target)[0]:
+ return None
+
+ common_path_len = len(commonpath(base_path, target))
+
+ # If there's no common prefix decrease common_path_len should be less by 1
+ base_drv, base_dir = os.path.splitdrive(base_path)
+ if common_path_len == len(base_drv) + 1:
+ common_path_len -= 1
+
+ # if base_path is root directory - no directories up
+ if base_dir == os.sep:
+ dirs_up = 0
+ else:
+ dirs_up = base_path[common_path_len:].count(os.sep)
+
+ ret = os.sep.join([os.pardir] * dirs_up)
+ if len(target) > common_path_len:
+ ret = os.path.join(ret, target[common_path_len + 1:])
+
+ return ret
+
+
+def get_link_from(source, dest):
+ #print "get_link_from", source, dest
+ source = get_file_for(source)
+ if not source:
+ return "."
+ dest = get_file_for(dest)
+ if not dest:
+ return "."
+ rel_path = relpath(directories[source.direc].abs_path, directories[dest.direc].abs_path)
+ try:
+ out_path = dest.out_path
+ except:
+ out_path = ""
+ #print dest
+ rel_path = os.path.join(rel_path, os.path.split(out_path)[1])
+ if rel_path.startswith("./"):
+ rel_path = rel_path[2:]
+ #print " from path:", source.out_path
+ #print " to path: ", out_path
+ #print " rel path: ", rel_path
+ return rel_path
+
+
+
+###############################################################################
+#
+# Utility functions
+#
+
+
+def get_program_directory():
+ """Return the path to the directory containing the build software."""
+ import __main__
+ path = os.path.dirname(__main__.__file__)
+ if path == "":
+ path = os.getcwd()
+ return path
+
+
+
+###############################################################################
+#
+# Logging
+#
+# 1 Error
+# 2 Warning
+# 3 Info
+# 4 Log
+# 5... Debug
+#
+def log(s, level=4):
+ if level>4:
+ indent = " " * (level-4)
+ else:
+ indent = ""
+ if level <= cfg.verbose:
+ print "%s%s" % (indent, s)
+
+def error(s):
+ log("error: %s" % s, 1)
+
+def warning(s):
+ log("warning: %s" % s, 2)
+
+def info(s):
+ log("info: %s" % s, 3)
+
+
+
+###############################################################################
+#
+# Hooks and plugins
+#
+
+
+# IkiWiki does something like this:
+# At startup:
+# getopt modify ARGV
+# checkconfig check configuration
+# refresh allow plugins to build source files
+# While scanning files:
+# needsbuild detect if page needs to be rebuild
+# filter arbitrary changes
+# scan collect metadata
+# While rendering files:
+# filter arbitrary changes
+# preprocess execute macros
+# linkify change wikilinks into links
+# htmlize turns text into html
+# sanitize sanitize html
+# templatefile allows changing of the template on a per-file basis
+# pagetemplate fill template with page
+# format similar to sanitize, but act on whole page body
+# At the end:
+# savestate plugins can save their state
+#
+#
+# We do something like this:
+#
+# At startup:
+# addoptions allow plugins to add command-line options
+# checkconfig check configuration
+# start
+# While reading files:
+# read ask any reader (plugins!) to read the file
+# filter ask anybody to filter the contents
+# While scanning files:
+# scan called per file, let plugins act on file data
+# scan_done Allows post-processing of scanned data
+# While rendering files:
+# htmlize turns text into html-part
+# linkify convert link macros to HTML
+# pagetemplate ask template engine (plugin!) to generate HTML out
+# of template and body part
+# At the end:
+# finish
+#
+# For more info, see plugins/skeleton.py
+#
+
+
+hooks = {}
+
+def load_plugins():
+ """Loads all plugins in the plugins directory."""
+ sys.path.append(os.path.join(get_program_directory(), "plugins"))
+ for s in cfg.plugins:
+ #print "import:", s
+ #try:
+ exec "import %s" % s
+ #except:
+ # print "Could not import plugin '%s'" % s
+ # sys.exit(1)
+
+
+def set_hook(name, last=False):
+ """This is a decorator, used for mostly plugins, which can append the
+ attached function to some hook"""
+ #print "set_hook, name", name
+ def inside_set_hook(func):
+ #print "inside_set_hook, function", func.__name__, "name", name, "last", last
+ if not hooks.has_key(name):
+ hooks[name] = []
+ func.last = last
+ hooks[name].append(func)
+ return func
+ return inside_set_hook
+
+
+def run_hooks(name, **kw):
+ """This runs hooks that are marked with @set_hook("name")"""
+ #print "run_hooks:", name
+ args = Holder(**kw)
+ args.setDefault("stop_if_result", False)
+ args.setDefault("return_holder", True)
+
+ # Need to wrap this because run_hooks() is called before
+ # cfg.verbose has been set
+ try:
+ log("running hook '%s'" % name, level=7)
+ except:
+ AttributeError
+
+ if hooks.has_key(name):
+ delay = []
+ for func in hooks[name]:
+ if func.last:
+ delay.append(func)
+ continue
+ #print "running hook:", func
+ res = func(args)
+ if args.stop_if_result and res:
+ return res
+ for func in delay:
+ #print "running hook (last):", func.__name__
+ res = func(args)
+ if args.stop_if_result and res:
+ return res
+ else:
+ return None
+ if args.return_holder:
+ return args
+ else:
+ return res
+
+
+macros = {}
+
+def set_macro(name):
+ """This is a decorator, used for mark executable macros"""
+
+ #print "set_macro, name", name
+ def inside_set_macro(func):
+ #print "inside_set_macro, function", func.__name__, "name", name
+ if macros.has_key(name):
+ error("macro %s already defined" % name)
+ return
+ macros[name] = func
+ return func
+ return inside_set_macro
+
+functions = {}
+
+def set_function(name):
+ """This is a decorator, used for mark executable functions"""
+
+ #print "set_function, name", name
+ def inside_set_function(func):
+ #print "inside_set_function, function", func.__name__, "name", name
+ if functions.has_key(name):
+ error("function %s already defined" % name)
+ return
+ functions[name] = func
+ return func
+ return inside_set_function
+
+
+def iso_to_time(val):
+ try:
+ t = time.strptime(val, "%Y-%m-%d %H:%M")
+ except ValueError:
+ try:
+ t = time.strptime(val, "%Y-%m-%d")
+ except ValueError:
+ warning("%s: wrong ISO format in '%s'" % (self.rel_path, s))
+ return int(time.mktime(t))
+
+@set_function("format_date")
+def format_date(timestamp):
+ return time.strftime(cfg.date_format, time.localtime(timestamp))
+
+@set_function("get_current_file")
+def get_current_file():
+ return current_file
+
+
+
+
+
+###############################################################################
+#
+# File reading
+#
+
+def read_file(direc, file):
+ """
+ Ask if some reader wants to read this file. If that happens,
+ and the reader reads the file in, the contents is also filtered.
+
+ The result is stored in file.contents
+
+ @param direc: directory the file is in
+ @type direc: a L{Directory} object
+ @param file: file to process
+ @type file: a L{File} object
+ """
+
+ contents = run_hooks("read",
+ direc=direc,
+ file=file,
+ stop_if_result=True,
+ return_holder=False)
+ if not contents:
+ return
+
+ log("filtering file %s" % file.rel_path, level=6)
+ file.contents = contents
+ res = run_hooks("filter",
+ direc=direc,
+ file=file)
+
+
+def walk_tree(dirpath):
+ """
+ Walks the directory rooted at 'path', and calls func(dirpath, filenames)
+ for each directory.
+
+ @param dirpath: starting directory
+ @type dirpath: string
+ @param func: function to call for found dirs/files
+ @type func: function(dirpath, filenames)
+ """
+
+ info("Reading files ...")
+
+ def walk(dirpath):
+ #print "walk", dirpath
+ rel_path = dirpath[len(cfg.in_dir):]
+ direc = Directory(rel_path=rel_path, abs_path=dirpath)
+ direc.inheritFrom(cfg)
+
+ if not rel_path: rel_path = "."
+ log("reading directory %s" % rel_path, level=4)
+
+ for s in os.listdir(dirpath):
+ full_path = os.path.join(dirpath, s)
+ ok = True
+ if os.path.isdir(full_path):
+ for e in cfg.exclude_dir:
+ if fnmatch.fnmatchcase(s, e):
+ log("ignoring directory %s" % s, level=7)
+ ok = False
+ break
+ if ok:
+ #print "DIR", s
+ walk(full_path)
+ if os.path.isfile(full_path):
+ for e in cfg.exclude_files:
+ if fnmatch.fnmatchcase(s, e):
+ log("ignoring file %s" % s, level=7)
+ ok = False
+ break
+ if ok:
+ #print "FILE", s
+ rel_path = relpath(cfg.in_dir, full_path)
+ log("reading file %s" % rel_path, level=5)
+ file = File(
+ path = full_path,
+ rel_path = rel_path,
+ direc = direc.rel_path
+ )
+ file.inheritFrom(direc)
+ read_file(direc, file)
+
+ walk(dirpath)
+
+
+
+###############################################################################
+#
+# Rendering
+#
+
+reMacro = re.compile(r'''
+ \[\[\! # Begin of macro
+ \s*
+ ([^\s\]]+) # Macro name
+ (?:
+ \s+ # optional space
+ ([^\]]+) # optional argumens
+ )?
+ \]\] # End of macro
+ ''', re.VERBOSE)
+reMacroArgs = re.compile(r'''
+ ([-_\w]+) # parameter name
+ (?:
+ \s*
+ =
+ \s*
+ (?:
+ "([^"]*)" # single-quoted
+ |
+ (\S+) # unquoted
+ )
+ )?
+ ''', re.VERBOSE)
+
+def run_macros(file, contents):
+ def do_macro(m):
+ name = m.group(1)
+ #print "\nname:", name
+ kw = {'name':name}
+ if m.group(2):
+ #print "args:", m.group(2)
+ for m2 in reMacroArgs.finditer(m.group(2)):
+ #print " param:", m2.group(1)
+ #print " arg:", m2.group(3) or m2.group(2)
+ kw[m2.group(1)] = m2.group(3) or m2.group(2)
+ if macros.has_key(name):
+ kw["file"] = file
+ f = macros[name]
+ s = f(kw)
+ if type(s) == types.UnicodeType:
+ s = s.encode("utf-8")
+ return s
+ else:
+ error("macro %s not defined" % name)
+ s = reMacro.sub(do_macro, contents)
+ #print s
+ return s
+
+
+def scan_files():
+ info("Scanning files ...")
+
+ for s in files:
+ file = files[s]
+ try:
+ # Just check if the file has contents
+ contents = file.contents
+ except:
+ continue
+
+ direc = directories[file.direc]
+
+ run_hooks("scan",
+ direc=direc,
+ file=file)
+ run_hooks("scan_done")
+
+
+def render_files():
+ info("Rendering files ...")
+
+ for fname_in in files:
+ global current_file
+ file = files[fname_in]
+ current_file = file
+
+ # Do we have a renderer?
+ if file.render is None:
+ log("unhandled file: %s" % file.rel_path, 7)
+ continue
+
+ # Is the renderer not the default HTML renderer?
+ if file.render != "html":
+ #print file.render, "on", file.rel_path
+ run_hooks(file.render,
+ file=file,
+ stop_if_result=True,
+ return_holder=False)
+ continue
+
+ # Run default renderer
+ direc = directories[file.direc]
+
+ contents = run_macros(file, file.contents)
+ #print "contents after 'macrorun':", contents
+ file.contents = contents
+
+ contents = run_hooks("htmlize",
+ direc=direc,
+ file=file,
+ stop_if_result=True,
+ return_holder=False)
+ #print "contents after 'htmlize':", contents
+ if not contents:
+ continue
+ file.contents = contents
+
+ # Output-Filename "berechnen"
+ file.out_path = os.path.splitext(fname_in)[0] + ".html"
+
+ for fname_in in files:
+ file = files[fname_in]
+ current_file = file
+ if not file.has_key("out_path"):
+ #print "no out_path", file.rel_path
+ continue
+ direc = directories[file.direc]
+
+ contents = run_hooks("linkify",
+ direc=direc,
+ file=file,
+ return_holder=False)
+ #print "contents after 'linkify':", contents
+ if not contents:
+ continue
+ file.contents = contents
+
+ # TODO: einige Fragmente sollen u.U. in eine andere
+ # Webseite eingebaut werden und sollten daher nicht in
+ # ein HTML-File landen
+ contents = run_hooks("pagetemplate",
+ direc=direc,
+ file=file,
+ stop_if_result=True,
+ return_holder=False)
+ #print "contents after 'pagetemplate':", contents
+
+
+ # Output-Directory erzeugen
+ fname_out = os.path.join(cfg.out_dir, file.out_path)
+ dir_out = os.path.split(fname_out)[0]
+ #print "dir_out:", dir_out
+ try:
+ os.makedirs(dir_out)
+ except OSError:
+ pass
+
+ # TODO: evtl. überprüfen, ob contents == f.read(), dann nicht schreiben
+ log("writing file %s" % fname_out, level=6)
+ f = open(fname_out, "w")
+ f.write(contents)
+ f.close()
+ # TODO: Time-Stamps setzen?
+
+ #print file.mtime, file.get("ctime","?")
+ #print direc.keys()
+
+
+
+###############################################################################
+#
+# Main program
+#
+
+@set_hook("addoptions")
+def addoptions(params):
+ parser = params["parser"]
+ parser.add_option("-i", "--in", dest="in_dir", default="in",
+ help="input directory",
+ metavar="DIR")
+ parser.add_option("-o", "--out", dest="out_dir", default="out",
+ help="output directory",
+ metavar="DIR")
+ parser.add_option("--style-dir", dest="style_dir", default="in/style",
+ help="directory with style sheets",
+ metavar="STYLE")
+ parser.add_option("-v", "--verbose", action="count",
+ dest="verbose", default=3,
+ help="print status messages to stdout")
+ parser.add_option("-k", "--keepgoing", dest="keepgoing",
+ action="store_true", default=False,
+ help="keep going past errors if possible")
+
+ return parser
+
+
+@set_hook("checkconfig", last=True)
+def checkconfig(params):
+ # Ensure absolute paths that end in '/'.
+ cfg.in_dir = os.path.join(os.getcwd(), cfg.in_dir).rstrip('/') + '/'
+ assert cfg.in_dir.endswith('/')
+
+
+def main():
+ global cfg
+
+ # Get configuration from webber.ini
+ cfg.load('webber.conf')
+
+ # Now load all plugins
+ load_plugins()
+
+ # Create parser and allow plugins to add their own command line stuff
+ parser = optparse.OptionParser()
+ args = run_hooks("addoptions", parser=parser)
+ (options, args) = parser.parse_args()
+
+ # Recast options into a Holder object, this allows
+ # us to use it for Mapping.inheritFrom()
+ options = Holder(**parser.values.__dict__)
+
+ # link contents of webber.ini into cfg and set some defaults,
+ # then let plugins fixup things in cfg.*
+ cfg.inheritFrom(options)
+ cfg.setDefault("exclude_dir", ["plugins"])
+ run_hooks("checkconfig")
+
+ run_hooks("start")
+
+ walk_tree(cfg.in_dir)
+ scan_files()
+ render_files()
+
+ run_hooks("finish")