rss_feed.py: limit number of published items via "rss_max_items"

[webber.git] / plugins / rss_feed.py
diff --git a/plugins/rss_feed.py b/plugins/rss_feed.py

index 28b0efcc7346d79b322a90cab2e18bcca570e03f..580d3040925ec0e07af77fd8af67eab82add60b4 100644 (file)
--- a/plugins/rss_feed.py
+++ b/plugins/rss_feed.py
@@ -1,8 +1,14 @@
  # -*- coding: iso-8859-1 -*-
  from webber import *
-import os, datetime, PyRSS2Gen
+import os, datetime, re
+try:
+       import PyRSS2Gen
+except ImportError:
+       print "rss_feed needs the python module PyRSS2Gen"
+       raise
  
  items = []
+max_age = 0
  
  
  @set_hook("checkconfig")
@@ -10,10 +16,16 @@ def checkconfig(params):
         if not cfg.has_key("rss_file"):
                 log('no "rss_file:" configured, using "feed.rss":', 4)
                 cfg.rss_file = "feed.rss"
+       if not cfg.has_key("rss_max_items"):
+               cfg.rss_max_items = 0
+       if cfg.has_key("rss_max_age_days"):
+               import time
+               global max_age
+               max_age = int(time.time()) - int(cfg.rss_max_age_days)*86400
  
  
+# Helper class needed for datetime.datetime to generate GMT timestamps
  ZERO = datetime.timedelta(0)
-
  class UTC(datetime.tzinfo):
      """UTC"""
  
@@ -33,35 +45,63 @@ def sitemap_scan(params):
         global items
  
         file = params.file
-       if not file.has_key("linktitle"):
+       if max_age and file["mtime"] < max_age:
                 return
-       if not file.has_key("change"):
+       if not file.has_key("linktitle"):
                 return
+       if file.has_key("change"):
+               change = file["change"]
+       else:
+               change = ""
  
         fname_out = os.path.join(cfg.out_dir, file.out_path)
         full_url = "http://%s/%s" % (cfg.main_url, fname_out)
         item = PyRSS2Gen.RSSItem(
                 title = file["title"],
                 link = full_url,
-               guid = PyRSS2Gen.Guid("%s %s" % (full_url, file["mtime"])),
-               description = file["change"],
-               pubDate = datetime.datetime.fromtimestamp(file["mtime"], utc),
+               guid = PyRSS2Gen.Guid("%s %s" % (full_url, file["mtime"]), isPermaLink=0),
+               description = change,
+               pubDate = file["mtime"]
         )
         items.append(item)
  
  
-
  @set_hook("finish")
  def finish(params):
+       global items
+       # Sort items by pubDate, which still holds the mtime
+       items.sort(key=lambda i: i.pubDate, reverse=True)
+
+       # Limit to requested number
+       count = int(cfg.rss_max_items)
+       if count:
+               items = items[:count]
+
+       # convert mtime to real pupDate
+       for i in items:
+               i.pubDate = datetime.datetime.fromtimestamp(i.pubDate, utc)
+
         rss = PyRSS2Gen.RSS2(
                 title = cfg.subtitle,
                 link = "http://%s" % cfg.main_url,
                 description = cfg.subtitle,
-               lastBuildDate = datetime.datetime.now(),
+               lastBuildDate = datetime.datetime.now(utc),
                 items = items,
         )
+       # Step one of self-reference
+       # (see http://feedvalidator.org/docs/warning/MissingAtomSelfLink.html)
+       rss.rss_attrs["xmlns:atom"] = "http://www.w3.org/2005/Atom"
+
         try:
                 os.makedirs(cfg.out_dir)
         except:
                 pass
-       rss.write_xml( open(os.path.join(cfg.out_dir, cfg.rss_file), "w"))
+       f = open(os.path.join(cfg.out_dir, cfg.rss_file), "w")
+       # Ugly XML beautification
+       s = rss.to_xml()
+       s = re.sub("<(?!/)", "\n<", s)
+       s = s.replace("\n\n", "\n")
+       # Step two of self-reference
+       s = s.replace('<channel>', '<channel>\n<atom:link href="http://%s/%s" rel="self" type="application/rss+xml" />' % (cfg.main_url, cfg.rss_file))
+       f.write(s[1:])
+       f.write("\n")