#!/usr/bin/perl
#
# rssticker - Present a HTML formatted news listing from a set of
# cached RSS dumps. If necessary, the RSS dumps will be
# refreshed on access.
#
my $version = "0.2.32";
my $homepage = "http://bre.klaki.net/programs/rssticker/";
#
# Copyright (C) 2001, Bjarni Rúnar Einarsson, http://bre.klaki.net/.
#
# Usage: http://path/to/rssticker/path/to/cache
# ?http://path/to/css
# &option1&option2
# &=id,http://path/to/rss1.rss&=N,id,http://path/to/rss2
#
# Note that the order matters - the CSS and option arguments must come
# before the RSS arguments. Any number of RSS soures may be specified -
# when displaying more than one the script will automatically keep track
# of each entry's age and sort the output accordingling.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# TODO:
#
# - Fix everything marked "FIXME"!
#
# - 0.2.x Add a default built-in CSS style.
# - 0.2.x Allow caller to specify a remote .js URL (like remote CSS).
# - 0.3.x Look into adding discussion features.
# - 0.?.? Use something more scalable than a flatfile for URL aliases.
# - 0.?.? Add a columns selector for table mode.
# - 0.?.? Add "portal mode" to help people select news sources etc. to
# easily create a bookmarkable "my news" page.
# - 0.?.? Use something smarter/safer than LWP::Simple::get.
# - 0.?.? Follow the XML standard properly...
#
# - Improve home page.
# - Translate Icelandic documentation to English.
# - Add more translations?
# - Announce on freshmeat.
#
# History:
#
# v0.2.32 - bre
# Added the groups=BLA feature, for server-side storage of long complex
# URLs. With a proper user-interface, this could make the rss ticker much
# easier to use, URLs would be reduced to something like this:
#
# http://rss.molar.is/rss/?group=blogbuddies
#
# Instead of the long ...&=...&=...&=... stuff we've been using until now.
#
# v0.2.31 - bre
# Guaranteed proper URL encoding of href=".." attributes.
#
# v0.2.30 - bre
# Added support for configuration directories, so the main config file
# doesn't have to contain all url alias definitions.
#
# v0.2.29 - bre
# Fixed a bug in the &max= attribute and guaranteed that the advertement
# RSS would never add to the total length of the displayed list.
#
# v0.2.28 - bre
# Fixed minor problems to do with invalid RSS/strange input - newlines
# at weird places or invalid/partial HTML tags could cause problems.
#
# v0.2.27 - bre
# Fixed behavior when subjects or other fields are empty.
#
# v0.2.26 - bre
# Implemented cookie-based user counter.
#
# v0.2.25 - bre
# Expiramental upper limit on how many are read from a given
# RSS feed.
#
# v0.2.24 - bre
# Added support for ad_dir and ad_pos - &noads in URLs disables.
#
# v0.2.23 - bre
# Fixed major bug in make_safe() - it didn't work.
#
# v0.2.22 - bre
# Made more friendly to people using a form to select RSS channels.
#
# v0.2.21 - bre
# Fixed some XML compatibility issues (more work needs to be done in this
# area).
#
# v0.2.20 - bre
# Fixed a bug to do with / in base64 encoded data, and using the darn
# base64 encoded data as a file name...
#
# v0.2.19 - bre
# Made the parser more forgiving of white-space in the URLs.
#
# v0.2.18 - bre
# Tweaked code which removes duplicate entries so it will update the old
# .. description and subject with new data before killing the new entry.
# Added &target=... option.
# Fixed deadline code to use a forking model, the eval/timer/die stuff
# .. wasn't working right.
#
# v0.2.17 - bre
# Added support for description fields (UNFINISHED!)
# Added title="" attributes to rssticker "buttons".
# Added &locale= configuration for selecting language.
# Added &justlist=1 mode for use with SSI
# Cleaned up the tables/notables and button handling code.
# Changed format of cache file (is backwards compatible) to add space
# .. for descriptions and chose a better seperator string.
# Changed format of configuration file to alias descriptions.
# Added deadline for all requests.
#
# v0.2.16 - bre
# Added primitive decoder for XML-&escaped; constructs. I should be
# using modules... Fixed sort order UI a bit (thanks Már).
#
# v0.2.15 - bre
# Disabled sorting stuff by default. Fixed sorting bugs.
#
# v0.2.14 - bre
# Added sorting options and URLs for news sources, translations and
# even more customizable HTML output. The top-level ..
# entity in the RSS file is now linked to from under the ID marker.
#
# v0.2.13 - bre
# Improved sloppy RSS parser so it can handle RSS 1.0 files.
# Added equally sloppy UTF8->Latin1 thunker.
#
# v0.2.12 - bre
# Fixed a bug where the cache file was always rewritten to disk even if
# nothing had changed. This caused popular channels never to get refreshed.
#
# v0.2.11 - bre
# Improved the javascript refresh, made it the default. Append an "m" to
# the refresh parameter to use the old one.
# Randomized the cache_ttl, to spread the load between diff. people.
#
# v0.2.10 - bre
# Added the option of using a javascript-based refresher instead of
# the META-EQUIV. Activate by appending "j" to the refresh time argument.
# Added the ¬ables option, which disables HTML table output and uses
# HTML 4
blocks instead.
#
# v0.2.9 - bre
# Allowed null resources, for skipping slots in generic CSSes.
# Improved the blurb at bottom a little bit.
#
# v0.2.8 - bre
# Made RSS ID's into plain digits signifying where in the URL order
# they occured. This facilitates re-use of stylesheets for different
# RSS combinations. Added "refresh" link.
#
# v0.2.7 - bre
# Added &refresh=N option, which allows the caller to specify the
# delay (in minutes) for the meta-refresh tag. 0 disables refreshing.
#
# v0.2.6 - bre
# Added info page.
# Improved comments.
#
use strict;
use LWP::Simple;
use MIME::Base64;
use File::stat;
use IO::File;
# Declare variables
my @rsspaths = ( );
my @headlines = ( );
my $stylesheet = "";
my $cachedir;
my $selfurl = undef;
my $config = {
use_cache => 1, # Use the cache or not?
policy => "open", # Default policy is open - any RSS/CSS is allowed.
sortby => "time", # Default sort order.
refresh => 15, # Default refresh is after 15 minutes.
jsrefresh => 1, # Use Javascript reloading code (default on).
sortable => 0, # Print fancy header for sorting.
editable => 0, # Offer inline editing features...
notables => 0, # Use
etc. instead of tables.
showdesc => 0, # Display channel descriptions?
cache_ttl => 10, # Default cache TTL is 10 minutes.
lang => "en", # Default is English...
justlist => 0, # Display HTML header/footer by default.
deadline => 15, # How long is this allowed to take?
target => "_top", # Default URL target.
headlines => 500, # Maximum headlines per page.
ad_dir => 0, # Directory containing advertisements
ad_pos => 4, # How many lines down do we insert ads?
ad_rss => "", # RSS added to all URLs, for advertising.
cnt_every => 0, # Interval between updating user counter.
cnt_file => undef, # User-counter log file.
cfg_dir => undef, # Directory containing config-file snippets.
};
my $urlalias = { };
my $descalias = { };
my $sourceinfo = { };
# Translation data...
my $msg = {
is => {
btn_chooser => "V",
des_chooser => "FIXME",
btn_close => "X",
des_close => "Fela þessa valkosti.",
btn_info => "uppl.",
des_info => "Birta upplýsingar um stillingar rssticker o.fl.",
btn_refresh => "uppfæra",
des_refresh => "Uppfæra þennan glugga",
btn_sortable => "R",
des_sortable => "Birta röðunarhnappa.",
btn_editable => "B",
des_editable => "Breyta lýsingum o.fl.",
Error => "Villa",
invalid_res => "Ógild veituskilgreining: ",
invalid_root => "Ógild rót!",
invalid_url => "Ógild vefslóð: ",
no_conf => "Gat ekki lesið stillingaskrá!",
not_a_cgi => "Ekki í CGI umhverfi?",
page_title => "Fyrirsagnalisti:",
policy_closed => "Þú mátt bara nota fyrirfram skilgreindar fréttaveitur.",
powered => "framreitt af",
sep => "::",
sort_order => "Raða eftir: ",
sort_id => "Nafni+",
sort_rev_id => "Nafni-",
sort_time => "Tíma+",
sort_rev_time => "Tíma-",
},
en => {
btn_chooser => "C",
des_chooser => "FIXME",
btn_close => "X",
des_close => "Hide these options.",
btn_info => "info",
des_info => "Information about this rssticker.",
btn_refresh => "refresh list",
des_refresh => "Reload the contents of this list.",
btn_sortable => "S",
des_sortable => "Display sorting toolbar.",
btn_editable => "E",
des_editable => "Edit descriptions etc.",
Error => "Error",
invalid_res => "Invalid resource specification: ",
invalid_root => "Invalid root!",
invalid_url => "Invalid URL in: ",
no_conf => "Failed to read configuration!",
not_a_cgi => "Not running via CGI?",
page_title => "Headline listing:",
policy_closed => "Policy is closed, you must use predefined URLs.",
powered => "powered by",
sep => "::",
sort_order => "Sort by: ",
sort_id => "ID+",
sort_rev_id => "ID-",
sort_time => "Time+",
sort_rev_time => "Time-",
}
};
# Check environment...
panic(i18n("not_a_cgi")) unless ($ENV{"SERVER_NAME"});
$cachedir = $ENV{"PATH_TRANSLATED"} || ".";
$cachedir =~ s/index\.html?$//;
$selfurl = $ENV{"SCRIPT_URI"} || $ENV{"SCRIPT_URL"} || "./";
panic(i18n("invalid_root") . $cachedir) unless (-e $cachedir."/rssticker.cfg");
# Read configuration file, set "conveniance" variables.
my $cfg = new IO::File;
$cfg->open($cachedir."/rssticker.cfg") || panic(i18n("no_conf"));
read_config_file($cfg);
$cfg->close();
my $target = $config->{target};
my $refresh = $config->{refresh} * 60;
my $cache_ttl = $config->{cache_ttl} * 60;
my $runtime = int(time() / $cache_ttl) * $cache_ttl;
my $deadline = time() + $config->{deadline};
# Load variables...
if (my $qs = $ENV{"QUERY_STRING"}) {
$qs =~ s/&?submit=[^&]*//gsi;
while ($qs =~ /(^|&)group=([^\s&\/]+)/)
{
my $g = "group=$2";
if (open (URL, ");
close(URL);
$url_data =~ s/\s+//gs;
$url_data =~ s/^&+//;
$qs =~ s/\Q$g\E/$url_data/;
}
}
if ($qs =~ s/&css=([^&]+)//is)
{
$qs = $1."&".$qs;
}
while ($qs =~ s/&refresh=(\d+)(j|m)?//is)
{
$refresh = $1 * 60;
$config->{jsrefresh} = 0 if ($2 =~ /m/i);
}
if ($qs =~ s/¬ables(=[^&]+)?//gis)
{
$config->{notables} = 1;
}
if ($qs =~ s/&noads(=[^&]+)?//gis)
{
$config->{ad_dir} = 0;
}
if ($qs =~ s/&nocache(=[^&]+)?//gis)
{
$config->{use_cache} = 0;
}
while ($qs =~ s/&locale=(en|is)//gis)
{
$config->{lang} = 1;
}
while ($qs =~ s/&showdesc=(1|0)//is)
{
$config->{showdesc} = $1;
}
while ($qs =~ s/&sortable=(0|1)//is)
{
$config->{sortable} = $1;
}
while ($qs =~ s/&editable=(0|1)//is)
{
$config->{editable} = $1;
}
while ($qs =~ s/&justlist=(0|1)//is)
{
$config->{justlist} = $1;
}
while ($qs =~ s/&max=(\d+)//is)
{
$config->{headlines} = $1;
}
while ($qs =~ s/&sort=(id|time|rev-time|rev-id)//is)
{
$config->{sortby} = $1;
}
while ($qs =~ s/&target=([^&]+)//is)
{
$target = urldecode($1);
}
$qs =~ s/\s*$//s;
$qs =~ s/^\s*//s;
$qs .= $config->{ad_rss} if ($config->{ad_dir});
@rsspaths = split(/\s*&r?=/, $qs);
$stylesheet = urldecode(shift @rsspaths);
# Look up alias for stylesheet, if necessary.
if (($stylesheet) && ($stylesheet !~ /^http:\/\//i))
{
$stylesheet = $urlalias->{lc($stylesheet)};
}
}
# Display a help page and quit if no resources were specified.
show_help_page() unless (@rsspaths);
# Import RSS data into headline listing...
my $max_lines = 0;
foreach my $path (unsort(@rsspaths))
{
my ($i, $u);
my $inc_max_lines = 1;
$inc_max_lines = 0 if (('&='.$path eq $config->{ad_rss}) ||
('&r='.$path eq $config->{ad_rss}));
# print STDERR "&=$path eq $config->{ad_rss} ? $inc_max_lines $max_lines\n";
$path = urldecode($path);
if ($path =~ /^((?:\d+,)?[^,]+),(.*)$/i)
{
($i, $u) = ($1,$2);
my $fn = $config->{cfg_dir}."/".lc($u).".cfg";
$fn =~ s,-,/,;
if ((defined $config->{cfg_dir}) && (-f $fn))
{
my $fh = new IO::File $fn;
read_config_file($fh);
$fh->close();
}
if (defined $urlalias->{lc($u)})
{
$u = $urlalias->{lc($u)};
$path = "$i,$u";
}
elsif (($config->{policy} !~ /^open$/) && ($u ne ""))
{
panic(i18n("policy_closed"));
}
elsif (($u !~ /^http:\/\//) && ($u ne ""))
{
panic(i18n("invalid_url").$path);
}
}
else
{
panic(i18n("invalid_res").$path);
}
my $max = 10;
my @args = split(/,/, $path, 3);
$max = shift @args if (@args == 3);
$max_lines += $max if ($inc_max_lines);
$sourceinfo->{$args[0]} = { };
if ($u ne "")
{
my @data = get_data(@args);
push @headlines, splice(@data, 0, $max);
}
}
# This will hide the advertisements until they're updated...
$config->{headlines} = $max_lines if ($max_lines < $config->{headlines});
# Assign ID numbers sequentially.
my $ids = { };
my $idn = 1;
foreach my $path (@rsspaths)
{
$path = urldecode($path);
my @args = split(/,/, $path, 3);
my $max = shift @args if (@args == 3);
$ids->{$args[0]} = $idn++;
}
# Everything is OK, render the RSS data as HTML.
#
print "Content-Type: text/html\n", user_counter(), "\n";
if (!$config->{justlist})
{
print "\n";
if ($refresh)
{
if (time() >= $deadline)
{
# We probably haven't refreshed everything, so tell browser
# to try again a bit sooner than usual.
$refresh /= 3;
}
if ($config->{jsrefresh})
{
my $t = time().$$;
print
"\n";
}
else
{
print "\n";
}
}
if ($stylesheet ne "")
{
print "\n";
}
print "", i18n("page_title"), " ",
join(", ", map { s/^\d+,//; s/,.*//; $_ } @rsspaths),
" - rssticker $version\n",
"
\n";
if ($config->{sortable})
{
my $baseurl = $selfurl ."?". $ENV{"QUERY_STRING"};
$baseurl =~ s/&sort=(id|time|rev-time|rev-id)//i;
$baseurl =~ s/&sortable=(0|1)//i;
my ($burl1, $burl2) = split(/\s*&=/,$baseurl,2);
$burl2 = '&='.$burl2 if ($burl2);
print "