forked from HPR/hpr-tools
		
	
		
			
				
	
	
		
			1711 lines
		
	
	
		
			56 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			1711 lines
		
	
	
		
			56 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env perl
 | 
						|
#===============================================================================
 | 
						|
#
 | 
						|
#         FILE: summarise_mail
 | 
						|
#
 | 
						|
#        USAGE: ./summarise_mail [-help] [-debug=N] [-from=DATE] [-to=DATE]
 | 
						|
#               [-out=FILE] [-template=FILE] [-[no]silent] [-[no]checknew]
 | 
						|
#               [-initialise=N]
 | 
						|
#
 | 
						|
#  DESCRIPTION: Generates a summary of the HPR mailing list for the Community
 | 
						|
#  		News
 | 
						|
#
 | 
						|
#      OPTIONS: ---
 | 
						|
# REQUIREMENTS: ---
 | 
						|
#         BUGS: ---
 | 
						|
#        NOTES: ---
 | 
						|
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | 
						|
#      VERSION: 0.0.5
 | 
						|
#      CREATED: 2015-02-15 15:06:11
 | 
						|
#     REVISION: 2015-10-04 20:21:44
 | 
						|
#
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# TODO
 | 
						|
#
 | 
						|
# - Add a means of reporting all of the message bodies in a thread. They
 | 
						|
#   ideally need formatting and should be printable so that a template can
 | 
						|
#   generate plain text versions for use by the Community News hosts when
 | 
						|
#   reviewing the last month's messages. This is quite a large task but maybe
 | 
						|
#   there are mail formatters that can do it.
 | 
						|
#
 | 
						|
#===============================================================================
 | 
						|
 | 
						|
use 5.010;
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
use utf8;
 | 
						|
 | 
						|
use Getopt::Long;
 | 
						|
use Pod::Usage;
 | 
						|
use Config::General;
 | 
						|
 | 
						|
use LWP::UserAgent;
 | 
						|
use Mail::Box::Manager;
 | 
						|
 | 
						|
use Date::Parse;
 | 
						|
use Date::Calc qw{:all};
 | 
						|
use DateTime;
 | 
						|
use DateTime::TimeZone;
 | 
						|
 | 
						|
use Template;
 | 
						|
use Template::Filters;
 | 
						|
Template::Filters->use_html_entities;    # Use HTML::Entities in the template
 | 
						|
 | 
						|
use Data::Dumper;
 | 
						|
 | 
						|
#
 | 
						|
# Version number (manually incremented)
 | 
						|
#
 | 
						|
our $VERSION = '0.0.5';
 | 
						|
 | 
						|
#
 | 
						|
# Script and directory names
 | 
						|
#
 | 
						|
( my $PROG = $0 ) =~ s|.*/||mx;
 | 
						|
( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
 | 
						|
$DIR = '.' unless $DIR;
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Declarations
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
#
 | 
						|
# Constants and other declarations
 | 
						|
#
 | 
						|
my $basedir    = "$ENV{HOME}/HPR/Community_News";
 | 
						|
my $configfile = "$basedir/.$PROG.cfg";
 | 
						|
my $bpfile     = "$basedir/mailnote_template.tpl";
 | 
						|
 | 
						|
my (%dtargs, $dt,      $tz,    $rd,      $mid,
 | 
						|
    $irt,    $ref,     $date,  @parsed,  %msg,
 | 
						|
    %id,     $dt_from, $dt_to, %threads, $total,
 | 
						|
);
 | 
						|
 | 
						|
my %tags = (
 | 
						|
    'from'        => 'From',
 | 
						|
    'to'          => 'To',
 | 
						|
    'cc'          => 'CC',
 | 
						|
    'date'        => 'Date',
 | 
						|
    'subject'     => 'Subject',
 | 
						|
    'archived-at' => 'Link',
 | 
						|
);
 | 
						|
 | 
						|
#
 | 
						|
# Enable Unicode mode
 | 
						|
#
 | 
						|
binmode STDOUT, ":encoding(UTF-8)";
 | 
						|
binmode STDERR, ":encoding(UTF-8)";
 | 
						|
 | 
						|
#
 | 
						|
# Load configuration data
 | 
						|
#
 | 
						|
my $conf = Config::General->new(
 | 
						|
    -ConfigFile      => $configfile,
 | 
						|
    -InterPolateVars => 1,
 | 
						|
    -ExtendedAccess  => 1,
 | 
						|
) or die "Unable to open $configfile\n";
 | 
						|
 | 
						|
my %config = $conf->getall() or die "Unable to process $configfile\n";
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Options and arguments
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
#
 | 
						|
# Process options
 | 
						|
#
 | 
						|
my %options;
 | 
						|
Options( \%options );
 | 
						|
 | 
						|
#
 | 
						|
# Default help
 | 
						|
#
 | 
						|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 )
 | 
						|
    if ( $options{'help'} );
 | 
						|
 | 
						|
#
 | 
						|
# Collect options
 | 
						|
#
 | 
						|
my $debug      = ( defined( $options{debug} )      ? $options{debug}    : 0 );
 | 
						|
my $initialise = ( defined( $options{initialise} ) ? 1                  : 0 );
 | 
						|
my $start      = $options{initialise};
 | 
						|
my $silent     = ( defined( $options{silent} )     ? $options{silent}   : 0 );
 | 
						|
my $checknew   = ( defined( $options{checknew} )   ? $options{checknew} : 0 );
 | 
						|
my $outfile    = $options{out};
 | 
						|
 | 
						|
my @fromdate;
 | 
						|
if ( defined( $options{from} ) ) {
 | 
						|
    @fromdate = parseDate( $options{from} );
 | 
						|
}
 | 
						|
 | 
						|
my @todate;
 | 
						|
if ( defined( $options{to} ) ) {
 | 
						|
    @todate = parseDate( $options{to} );
 | 
						|
}
 | 
						|
 | 
						|
my $template
 | 
						|
    = ( defined( $options{template} ) ? $options{template} : $bpfile );
 | 
						|
 | 
						|
#
 | 
						|
# Check for illegal option combinations
 | 
						|
#
 | 
						|
die "Use only one of -initialise and -checknew\n"
 | 
						|
    if ( $initialise && $checknew );
 | 
						|
 | 
						|
#
 | 
						|
# Sanity checks
 | 
						|
#
 | 
						|
die "Error: Unable to find template $template\n" unless -r $template;
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Default the from date to today and compute a to date if one wasn't given
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
if ( scalar(@fromdate) == 0 ) {
 | 
						|
    @fromdate = Today();
 | 
						|
}
 | 
						|
 | 
						|
if ( scalar(@todate) == 0 ) {
 | 
						|
    @todate = Add_Delta_Days( @fromdate[ 0, 1 ],
 | 
						|
        1, Days_in_Month( @fromdate[ 0, 1 ] ) );
 | 
						|
}
 | 
						|
 | 
						|
#
 | 
						|
# We need DateTime dates for comparison
 | 
						|
#
 | 
						|
emit( $silent, "From: ", ISO8601_Date(@fromdate), "\n" );
 | 
						|
emit( $silent, "To:   ", ISO8601_Date(@todate),   "\n" );
 | 
						|
 | 
						|
$dt_from = calcToDT(@fromdate);
 | 
						|
$dt_to   = calcToDT(@todate);
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Open the output file (or STDOUT) - we may need the date to do it
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
my $outfh;
 | 
						|
if ($outfile) {
 | 
						|
    $outfile = sprintf( $outfile,
 | 
						|
        sprintf( "%d-%02d", $dt_from->year, $dt_from->month ) )
 | 
						|
        if ( $outfile =~ /%s/ );
 | 
						|
    emit( $silent, "Output: ", $outfile, "\n" );
 | 
						|
 | 
						|
    open( $outfh, ">:encoding(UTF-8)", $outfile )
 | 
						|
        or die "Unable to open $outfile for writing: $!";
 | 
						|
}
 | 
						|
else {
 | 
						|
    open( $outfh, ">&", \*STDOUT )
 | 
						|
        or die "Unable to initialise for writing: $!";
 | 
						|
}
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Check the configuration file
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
checkConfig( \%config );
 | 
						|
 | 
						|
#
 | 
						|
# The cached mail file
 | 
						|
#
 | 
						|
my $mailfile = join( '/', @{ $config{cache} }{ 'directory', 'filename' } );
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# If asked to initialise the cache check it makes sense, otherwise look to see
 | 
						|
# if we're to update an existing cache
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
if ($initialise) {
 | 
						|
    if ( -e $mailfile ) {
 | 
						|
        die "The mail cache $mailfile exists.\n"
 | 
						|
            . "Delete it before initialising\n";
 | 
						|
    }
 | 
						|
    unless (initialiseCache( \%config, $start )) {
 | 
						|
        print "Failed to initialise the cache; can't continue\n";
 | 
						|
        exit;
 | 
						|
    }
 | 
						|
}
 | 
						|
elsif ($checknew) {
 | 
						|
    unless ( -e $mailfile ) {
 | 
						|
        die "The mail cache $mailfile does not exist\n"
 | 
						|
            . "Initialise it before checking for updates\n";
 | 
						|
    }
 | 
						|
    unless (updateCache( \%config )) {
 | 
						|
        print "Failed to update the cache; continuing with old mail\n";
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#
 | 
						|
# Open the mail file (as a folder in MBOX format)
 | 
						|
#
 | 
						|
my $mgr    = Mail::Box::Manager->new;
 | 
						|
my $folder = $mgr->open($mailfile);
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Walk through the messages in the folder collecting message details into
 | 
						|
# a hash structure indexed by the UTC timestamp.
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
foreach my $message ( $folder->messages ) {
 | 
						|
    #
 | 
						|
    # TODO This doesn't work for a reason I don't understand. Needs to be
 | 
						|
    # fixed
 | 
						|
    #
 | 
						|
    #$message->unfold();
 | 
						|
 | 
						|
    #
 | 
						|
    # Parse and convert the message date. The @parsed array contains
 | 
						|
    # $ss,$mm,$hh,$day,$month,$year,$zone. The year and month need adjustment.
 | 
						|
    # The timezone value is number of seconds from UTC.
 | 
						|
    #
 | 
						|
    $date   = $message->get('Date');
 | 
						|
    @parsed = strptime($date);
 | 
						|
    $parsed[5] += 1900;
 | 
						|
    $parsed[4] += 1;
 | 
						|
 | 
						|
    #
 | 
						|
    # Convert the parsed date to a DateTime
 | 
						|
    #
 | 
						|
    @dtargs{ 'second', 'minute', 'hour', 'day', 'month', 'year' } = (@parsed);
 | 
						|
    $dt = DateTime->new(%dtargs);
 | 
						|
    $tz = DateTime::TimeZone->new(
 | 
						|
        name => DateTime::TimeZone->offset_as_string( $parsed[6] ) );
 | 
						|
    $dt->set_time_zone($tz);
 | 
						|
 | 
						|
    #
 | 
						|
    # Stash mail parameters
 | 
						|
    #
 | 
						|
    $rd  = $dt->utc_rd_as_seconds();
 | 
						|
    $mid = formatID( $message->get('Message-ID'), 0 );
 | 
						|
    $irt = formatID( $message->get('In-Reply-To'), 1 );
 | 
						|
 | 
						|
    my @from = $message->from;
 | 
						|
    my @to   = $message->to;
 | 
						|
    my @cc   = $message->cc;
 | 
						|
 | 
						|
    $msg{$rd} = {
 | 
						|
        # '_date' => $date,
 | 
						|
        # '_from' => \@ffrom,
 | 
						|
        # '_to'   => \@fto,
 | 
						|
        # '_cc'   => \@fcc,
 | 
						|
        '_mid'    => $message->get('Message-ID'),
 | 
						|
        '_irt'    => $message->get('In-Reply-To'),
 | 
						|
        '_rdate'  => $rd,
 | 
						|
        'date'    => $dt->ymd . ' ' . $dt->hms . ' ' . $dt->time_zone->name,
 | 
						|
        'from'    => formatEmail( \@from ),
 | 
						|
        'to'      => formatEmail( \@to ),
 | 
						|
        'cc'      => formatEmail( \@cc ),
 | 
						|
        'subject' => $message->get('Subject'),
 | 
						|
        'archived-at' => trimHeader( $message->get('Archived-At') ),
 | 
						|
        'message-id'  => $mid,
 | 
						|
        'in-reply-to' => $irt,
 | 
						|
        'references'  => formatReferences( $message->get('References') ),
 | 
						|
        'parent'      => undef,
 | 
						|
        'children'    => [],
 | 
						|
    };
 | 
						|
 | 
						|
    #
 | 
						|
    # Stash the message id for easier linking
 | 
						|
    #
 | 
						|
    $id{$mid} = $rd;
 | 
						|
}
 | 
						|
 | 
						|
$folder->close;
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Link the messages according to their internal back-references. Rescue any
 | 
						|
# broken threads caused by people not using their mail clients properly.
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
linkMessages( \%msg, \%id, ( $debug == 0 ) );
 | 
						|
repairThreads( \%msg, ( $debug == 0 ) );
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Generate the hash of message thread details that start in the selected
 | 
						|
# period. Prepare the template and generate the final document
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
$total = buildThreadSummary( $dt_from, $dt_to, \%config, \%msg, \%threads,
 | 
						|
    ( $debug == 0 ) );
 | 
						|
 | 
						|
my $tt = Template->new(
 | 
						|
    {   ABSOLUTE     => 1,
 | 
						|
        ENCODING     => 'utf8',
 | 
						|
        INCLUDE_PATH => $basedir,
 | 
						|
    }
 | 
						|
);
 | 
						|
my $vars = {
 | 
						|
    total   => $total,
 | 
						|
    threads => \%threads,
 | 
						|
};
 | 
						|
my $document;
 | 
						|
$tt->process( $template, $vars, \$document, { binmode => ':utf8' } )
 | 
						|
    || die $tt->error(), "\n";
 | 
						|
print $outfh $document;
 | 
						|
close($outfh);
 | 
						|
 | 
						|
exit;
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: checkConfig
 | 
						|
#      PURPOSE: Check the configuration, aborting if it's messed up and
 | 
						|
#               missing necessary items
 | 
						|
#   PARAMETERS: $config         configuration hash
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Checks that all of the required items are present in the
 | 
						|
#               $config hash. These have been parsed from the configuration
 | 
						|
#               file usually called '.summarise_mail.cfg'. It only performs
 | 
						|
#               existence checks, nothing more fancy.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub checkConfig {
 | 
						|
    my ($config) = @_;
 | 
						|
 | 
						|
    die "Configuration file problem\n"
 | 
						|
        unless ( defined( $config->{gmane}->{url} )
 | 
						|
        && defined( $config->{gmane}->{template1} )
 | 
						|
        && defined( $config->{gmane}->{thread} )
 | 
						|
        && defined( $config->{gmane}->{template2} )
 | 
						|
        && defined( $config->{gmane}->{lookahead} )
 | 
						|
        && defined( $config->{cache}->{directory} )
 | 
						|
        && defined( $config->{cache}->{filename} )
 | 
						|
        && defined( $config->{cache}->{regex} ) );
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: initialiseCache
 | 
						|
#      PURPOSE: Load a new cache from Gmane
 | 
						|
#   PARAMETERS: $config         Configuration hashref
 | 
						|
#               $start          Start number to download
 | 
						|
#      RETURNS: If the GET succeeded then 1 otherwise 0
 | 
						|
#  DESCRIPTION: Creates a new cache file of mail downloaded from Gmane. The
 | 
						|
#               file is in MBOX format but has had Gmane's obscuration
 | 
						|
#               algorithms applied to it, messing up certain elements like
 | 
						|
#               message-ids. The download begins with message $start and
 | 
						|
#               finishes with $start+LOOKAHEAD where the lookahead value is
 | 
						|
#               defined in the configuration file; usually 100.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub initialiseCache {
 | 
						|
    my ( $config, $start ) = @_;
 | 
						|
 | 
						|
    #
 | 
						|
    # How much to look ahead in Gmane; defined in the configuration
 | 
						|
    #
 | 
						|
    my $lookahead = $config->{gmane}->{lookahead};
 | 
						|
 | 
						|
    #
 | 
						|
    # The name of the mail cache and its directory are in the configuration
 | 
						|
    #
 | 
						|
    my $mailfile
 | 
						|
        = join( '/', @{ $config->{cache} }{ 'directory', 'filename' } );
 | 
						|
 | 
						|
    #
 | 
						|
    # Make the URL for download
 | 
						|
    #
 | 
						|
    my $url = sprintf( $config->{gmane}->{template1},
 | 
						|
        $start, $start + $lookahead + 1 );
 | 
						|
    print "URL: $url\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Set up the HTTP GET
 | 
						|
    #
 | 
						|
    my $ua = LWP::UserAgent->new;
 | 
						|
    $ua->agent('HPR-Agent/0.1');
 | 
						|
    $ua->timeout(10);
 | 
						|
 | 
						|
    #
 | 
						|
    # Get the data (if any)
 | 
						|
    #
 | 
						|
    my $response = $ua->get($url);
 | 
						|
 | 
						|
    if ( $response->is_success ) {
 | 
						|
        #
 | 
						|
        # The GET succeeded, see what came back
 | 
						|
        #
 | 
						|
        if ( length( ${ $response->content_ref } ) > 0 ) {
 | 
						|
            #
 | 
						|
            # We got some new data. Append it to the rest
 | 
						|
            #
 | 
						|
            print "Mail messages found\n";
 | 
						|
            open( my $fh, ">", $mailfile )
 | 
						|
                or die "Unable to open $mailfile for writing\n";
 | 
						|
            print $fh $response->decoded_content;
 | 
						|
            close($fh);
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            print "No messages found\n";
 | 
						|
        }
 | 
						|
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        #
 | 
						|
        # The GET failed in a nasty way
 | 
						|
        #
 | 
						|
        warn $response->status_line;
 | 
						|
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: updateCache
 | 
						|
#      PURPOSE: Get the latest mail messages from Gmane and save them in the
 | 
						|
#               cache
 | 
						|
#   PARAMETERS: $config         configuration hash
 | 
						|
#      RETURNS: If the GET succeeded then 1 otherwise 0
 | 
						|
#  DESCRIPTION: Adds more messages downloaded from Gmane to the end of the
 | 
						|
#               current cache file. It needs to work out the number of the
 | 
						|
#               last message in the cache, which it does with a regex, looking
 | 
						|
#               at 'Archived-At:' headers. Then it adds the lookahead value
 | 
						|
#               from the configuration file to that and downloads a maximum of
 | 
						|
#               that number of messages using the Gmane API. These messages
 | 
						|
#               are appended to the existing MBOX file. There is no means of
 | 
						|
#               expiring older messages, and perhaps there should be.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub updateCache {
 | 
						|
    my ($config) = @_;
 | 
						|
 | 
						|
    #
 | 
						|
    # How much to look ahead in Gmane; defined in the configuration
 | 
						|
    #
 | 
						|
    my $lookahead = $config->{gmane}->{lookahead};
 | 
						|
 | 
						|
    #
 | 
						|
    # The regex to parse the permalink URL is in the configuration
 | 
						|
    #
 | 
						|
    my $re = qr{$config->{cache}->{regex}};
 | 
						|
 | 
						|
    #
 | 
						|
    # The name of the mail cache and its directory are in the configuration
 | 
						|
    #
 | 
						|
    my $mailfile
 | 
						|
        = join( '/', @{ $config->{cache} }{ 'directory', 'filename' } );
 | 
						|
 | 
						|
    #
 | 
						|
    # Open the mail file
 | 
						|
    #
 | 
						|
    my $mgr    = Mail::Box::Manager->new;
 | 
						|
    my $folder = $mgr->open($mailfile);
 | 
						|
 | 
						|
    #
 | 
						|
    # Walk all the messages, parsing the permalink URLs to get the start and
 | 
						|
    # end
 | 
						|
    #
 | 
						|
    my $plink;
 | 
						|
    my ( $msgno, $start, $end ) = ( 0, 0, 0 );
 | 
						|
    foreach my $message ( $folder->messages ) {
 | 
						|
        $plink = $message->get('Archived-At');
 | 
						|
        if ( ($msgno) = ( $plink =~ /$re/ ) ) {
 | 
						|
            if ( $start eq 0 ) {
 | 
						|
                $start = $msgno;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                $end = $msgno;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $folder->close;
 | 
						|
 | 
						|
    #
 | 
						|
    # Build the new URL to get more messages, starting from the last one we
 | 
						|
    # have plus 1 for $lookahead+1 messages
 | 
						|
    #
 | 
						|
    my $url = sprintf( $config->{gmane}->{template1},
 | 
						|
        $end + 1, $end + $lookahead + 1 );
 | 
						|
    print "URL: $url\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Set up the HTTP GET
 | 
						|
    #
 | 
						|
    my $ua = LWP::UserAgent->new;
 | 
						|
    $ua->agent('HPR-Agent/0.1');
 | 
						|
    $ua->timeout(10);
 | 
						|
 | 
						|
    #
 | 
						|
    # Get the data (if any)
 | 
						|
    #
 | 
						|
    my $response = $ua->get($url);
 | 
						|
 | 
						|
    if ( $response->is_success ) {
 | 
						|
        #
 | 
						|
        # The GET succeeded, see what came back
 | 
						|
        #
 | 
						|
        if ( length( ${ $response->content_ref } ) > 0 ) {
 | 
						|
            #
 | 
						|
            # We got some new data. Append it to the rest
 | 
						|
            #
 | 
						|
            print "New messages found\n";
 | 
						|
            open( my $fh, ">>", $mailfile )
 | 
						|
                or die "Unable to open $mailfile for appending\n";
 | 
						|
            print $fh $response->decoded_content;
 | 
						|
            close($fh);
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            print "No new messages\n";
 | 
						|
        }
 | 
						|
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        #
 | 
						|
        # The GET failed in a nasty way
 | 
						|
        #
 | 
						|
        warn $response->status_line;
 | 
						|
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: parseDate
 | 
						|
#      PURPOSE: Parse and check a date
 | 
						|
#   PARAMETERS: $date           Date string
 | 
						|
#      RETURNS: Date::Calc date or 'undef' if $date is undefined
 | 
						|
#  DESCRIPTION: Parses a date string using 'strptime' and converts it to
 | 
						|
#               a Date::Calc date list
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub parseDate {
 | 
						|
    my ($date) = @_;
 | 
						|
 | 
						|
    if ( defined($date) ) {
 | 
						|
        #
 | 
						|
        # Parse and format the date
 | 
						|
        #
 | 
						|
        my @parsed = strptime($date);
 | 
						|
        @parsed[ 4, 5 ] = ( $parsed[4] + 1, $parsed[5] + 1900 );
 | 
						|
 | 
						|
        return ( @parsed[ 5, 4, 3 ] );
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        return; # implicit undef
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: ISO8601_Date
 | 
						|
#      PURPOSE: Format a Date::Calc date in ISO8601 format
 | 
						|
#   PARAMETERS: @date   - a date in the Date::Calc format
 | 
						|
#      RETURNS: Text string containing a YYYY-MM-DD date
 | 
						|
#  DESCRIPTION: Just a convenience to allow a simple call like
 | 
						|
#               $str = ISO8601_Date(@date)
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub ISO8601_Date {
 | 
						|
    my (@date) = (@_)[ 0, 1, 2 ];
 | 
						|
 | 
						|
    if ( check_date(@date) ) {
 | 
						|
        return sprintf( "%04d-%02d-%02d", @date );
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        return "*Invalid Date*";
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: calcToDT
 | 
						|
#      PURPOSE: Convert a Date::Calc date to a UTC DateTime date
 | 
						|
#   PARAMETERS: $date           Date::Calc date
 | 
						|
#      RETURNS: A DateTime date in the UTC timezone
 | 
						|
#  DESCRIPTION: Reformats a Date::Calc date to a DateTime date based on UTC
 | 
						|
#               (for date comparison)
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub calcToDT {
 | 
						|
    my (@date) = (@_)[ 0, 1, 2 ];
 | 
						|
 | 
						|
    #
 | 
						|
    # Transfer Date::Calc values into a hash for initialising a DateTime
 | 
						|
    # object.
 | 
						|
    #
 | 
						|
    my ( %dtargs, $dt );
 | 
						|
    @dtargs{ 'year', 'month', 'day', 'time_zone' } = ( @date, 'UTC' );
 | 
						|
    $dt = DateTime->new(%dtargs);
 | 
						|
    return $dt;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: formatEmail
 | 
						|
#      PURPOSE: Trims an array of anonymised Gmane email addresss in
 | 
						|
#               Mail::Address objects
 | 
						|
#   PARAMETERS: $remails         The array of Mail::Address objects
 | 
						|
#      RETURNS: An array containing the trimmed addresses formatted as text
 | 
						|
#  DESCRIPTION: These addresses look like "Ken Fallon
 | 
						|
#               <ken-sq8tSfcSOvWzQB+pC5nmwQ@public.gmane.org>". The display
 | 
						|
#               name part (Ken Fallon) is anonymised if it contains an
 | 
						|
#               address-like specification. The local-part of the address is
 | 
						|
#               anonymised and the domain replaced by a Gmane domain. The
 | 
						|
#               front part of the local-part is unaffected. We want to build
 | 
						|
#               an address from the display name (if there is one), and the
 | 
						|
#               front of the local-part. So the above becomes:
 | 
						|
#               "Ken Fallon <ken@...>"
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub formatEmail {
 | 
						|
    my ($remails) = @_;
 | 
						|
 | 
						|
    my ( $phrase, $address, $comment, @results );
 | 
						|
 | 
						|
    foreach my $email ( @{$remails} ) {
 | 
						|
        #
 | 
						|
        # Parse out the components
 | 
						|
        #
 | 
						|
        ( $phrase, $address, $comment )
 | 
						|
            = ( $email->phrase, $email->address, $email->comment );
 | 
						|
 | 
						|
        #
 | 
						|
        # Assume Gmane obscured any email address wherever it is. Strip out
 | 
						|
        # the obscuring part to make it more readable
 | 
						|
        #
 | 
						|
        foreach my $item ( $phrase, $address, $comment ) {
 | 
						|
            if ( $item =~ /^"?([^-]+).+@.+$/ ) {
 | 
						|
                $item = "$1\@...";
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Let the module reassemble the address in the right format, and add
 | 
						|
        # it to the result list
 | 
						|
        #
 | 
						|
        my $obj = Mail::Address->new( $phrase, $address, $comment );
 | 
						|
        push( @results, $obj->format );
 | 
						|
    }
 | 
						|
 | 
						|
    return \@results;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: formatID
 | 
						|
#      PURPOSE: Reformat a Message-ID: or In-Reply-To: value to undo the
 | 
						|
#               effects of Gmane's mail address obscuration which is applied
 | 
						|
#               to one but not the other.
 | 
						|
#   PARAMETERS: $header         The header for processing
 | 
						|
#               $gmane          Boolean to indicate whether we need to strip
 | 
						|
#                               a Gmane obscuration part
 | 
						|
#      RETURNS: The processed header
 | 
						|
#  DESCRIPTION: If $gmane is true recognises the possible Gmane obscuration
 | 
						|
#               patterns and extracts the important elements from the ID,
 | 
						|
#               otherwise just strips the '@' and what follows. This is
 | 
						|
#               because Gmane (presumably) detects the '@' in every header
 | 
						|
#               type, assumes it's an address and applies its obscuring
 | 
						|
#               technique to it. Shame they apply such a sledgehammer
 | 
						|
#               algorithm.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub formatID {
 | 
						|
    my ( $header, $gmane ) = @_;
 | 
						|
 | 
						|
    return $header unless defined($header);
 | 
						|
 | 
						|
    $header =~ s/(^<|>$)//g;
 | 
						|
    if ($gmane) {
 | 
						|
        if ( $header =~ /^(.+)-[^-]+@.+$/ ) {
 | 
						|
            $header = "$1";
 | 
						|
        }
 | 
						|
        elsif ( $header =~ /^(.+)@.+$/ ) {
 | 
						|
            $header = "$1";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        $header =~ s/@.+$//;
 | 
						|
    }
 | 
						|
 | 
						|
    return $header;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: formatReferences
 | 
						|
#      PURPOSE: Reformat a list of references in the References: header
 | 
						|
#   PARAMETERS: $header         The header for processing
 | 
						|
#      RETURNS: The processed header as an arrayref
 | 
						|
#  DESCRIPTION: Removes the '<>' around each reference, but also removes the
 | 
						|
#               bit after and including the '@' since we need to compare it
 | 
						|
#               with the ID values we have had to tidy after Gmane got to
 | 
						|
#               them.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub formatReferences {
 | 
						|
    my ($header) = @_;
 | 
						|
 | 
						|
    return $header unless defined($header);
 | 
						|
 | 
						|
    my ( @refs, @results );
 | 
						|
 | 
						|
    @refs = split( /\s+/, $header );
 | 
						|
    foreach my $ref (@refs) {
 | 
						|
        $ref =~ s/(^<|>$)//g;
 | 
						|
        $ref =~ s/@.+$//;
 | 
						|
        push( @results, $ref );
 | 
						|
    }
 | 
						|
 | 
						|
    return \@results;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: trimHeader
 | 
						|
#      PURPOSE: Trims an arbitrary header which is enclosed in "<..>"
 | 
						|
#   PARAMETERS: $header         The header for trimming
 | 
						|
#      RETURNS: The trimmed header
 | 
						|
#  DESCRIPTION: Certain Gmane headers like "Message-Id" are enclosed in "<>"
 | 
						|
#               marks. We want to strip these and return the result
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub trimHeader {
 | 
						|
    my ($header) = @_;
 | 
						|
 | 
						|
    return $header unless defined($header);
 | 
						|
 | 
						|
    $header =~ s/(^<|>$)//g;
 | 
						|
 | 
						|
    return $header;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: coalesce
 | 
						|
#      PURPOSE: To find the first defined argument and return it
 | 
						|
#   PARAMETERS: Arbitrary number of arguments
 | 
						|
#      RETURNS: The first defined argument or undef if there are none
 | 
						|
#  DESCRIPTION: Modelled on the SQL function of the same name. It takes a list
 | 
						|
#               of arguments, scans it for the first one that is not undefined
 | 
						|
#               and returns it. If an argument is defined and it's an arrayref
 | 
						|
#               then the referenced array is returned comma-delimited. This
 | 
						|
#               allows calls such as "coalesce($var,'undef')" which returns
 | 
						|
#               the value of $var if it's defined, and 'undef' if not and
 | 
						|
#               doesn't break anything along the way.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub coalesce {
 | 
						|
    foreach (@_) {
 | 
						|
        if ( defined($_) ) {
 | 
						|
            if ( ref($_) eq 'ARRAY' ) {
 | 
						|
                return join( ',', @{$_} );
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                return $_;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return; # implicit undef
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: empty
 | 
						|
#      PURPOSE: Determine whether the argument contains data
 | 
						|
#   PARAMETERS: $arg            Argument
 | 
						|
#      RETURNS: 0 if data found, otherwise 1
 | 
						|
#  DESCRIPTION: If an argument is defined and it's an arrayref then the array
 | 
						|
#               must contain elements otherwise it's regarded as empty. An
 | 
						|
#               undefined argument is empty of course.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub empty {
 | 
						|
    my ($arg) = @_;
 | 
						|
 | 
						|
    if ( defined($arg) ) {
 | 
						|
        if ( ref($arg) eq 'ARRAY' ) {
 | 
						|
            return scalar(@$arg) == 0;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            return 0;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        return 1;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: linkMessages
 | 
						|
#      PURPOSE: Link the collected messages using the 'In-Reply-To:' and
 | 
						|
#               'References' headers
 | 
						|
#   PARAMETERS: $msg            Hashref containing the messages
 | 
						|
#               $id             Hashref containing message-id to message key
 | 
						|
#                               links
 | 
						|
#               $silent         Boolean determining whether to emit error
 | 
						|
#                               messages
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: We want to build a structure where messages contain forward
 | 
						|
#               "child" links to other messages. So, given a message, we look
 | 
						|
#               for backward links in the 'In-Reply-To:' and 'References'
 | 
						|
#               headers, go to the referenced message(s) and make the current
 | 
						|
#               one a child. We don't do anything with multiple 'References',
 | 
						|
#               though we could.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub linkMessages {
 | 
						|
    my ( $msg, $id, $silent ) = @_;
 | 
						|
 | 
						|
    my $irt;
 | 
						|
 | 
						|
    foreach my $key ( sort( keys( %{$msg} ) ) ) {
 | 
						|
        $irt = $msg->{$key}->{'in-reply-to'};
 | 
						|
 | 
						|
        if ( defined($irt) ) {
 | 
						|
            #
 | 
						|
            # There's an In-Reply-To: header
 | 
						|
            #
 | 
						|
            if ( defined( $id->{$irt} ) ) {
 | 
						|
                #
 | 
						|
                # The parent of this message is the key of the back reference and
 | 
						|
                # we add this key to the children of the parent of this message
 | 
						|
                #
 | 
						|
                $msg->{$key}->{parent} = $id->{$irt};
 | 
						|
                push(
 | 
						|
                    @{ $msg->{ $msg->{$key}->{parent} }->{children} },
 | 
						|
                    $id->{ $msg->{$key}->{'message-id'} }
 | 
						|
                );
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                emit( $silent,
 | 
						|
                    "Unable to find Message-ID: $irt (in message $key)\n" );
 | 
						|
            }
 | 
						|
        }
 | 
						|
        elsif ( defined( $msg->{$key}->{'references'} ) ) {
 | 
						|
            #
 | 
						|
            # There's no In-Reply-To: but we have References:, where the last one
 | 
						|
            # is the equivalent. We update the parent message
 | 
						|
            #
 | 
						|
            $ref = $msg->{$key}->{'references'}->[-1];
 | 
						|
            if ( defined($ref) && defined( $id->{$ref} ) ) {
 | 
						|
                #
 | 
						|
                # The parent of this message is the key of the back reference and
 | 
						|
                # we add this key to the children of the parent of this message
 | 
						|
                #
 | 
						|
                $msg->{$key}->{parent} = $id->{$ref};
 | 
						|
                push(
 | 
						|
                    @{ $msg->{ $msg->{$key}->{parent} }->{children} },
 | 
						|
                    $id->{ $msg->{$key}->{'message-id'} }
 | 
						|
                );
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: repairThreads
 | 
						|
#      PURPOSE: Repair broken threads where possible
 | 
						|
#   PARAMETERS: $mhash          Hashref of all messages
 | 
						|
#               $silent         Boolean determining whether to emit error
 | 
						|
#                               messages
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: We look for messages with a subject beginning with 'Re:' which
 | 
						|
#               don't have any back links. These look like broken threads
 | 
						|
#               where the sender has not used their mail client's ability to
 | 
						|
#               generate an 'In-Reply-To:' and/or 'References' header.  We try
 | 
						|
#               to find the thread root by looking for the subject in all of
 | 
						|
#               the messages and then make the linkages.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub repairThreads {
 | 
						|
    my ( $mhash, $silent ) = @_;
 | 
						|
 | 
						|
    foreach my $key ( sort( keys( %{$mhash} ) ) ) {
 | 
						|
        if ( $mhash->{$key}->{subject} =~ /^Re:/ ) {
 | 
						|
            if (!(     defined( $mhash->{$key}->{'in-reply-to'} )
 | 
						|
                    || defined( $mhash->{$key}->{'references'} )
 | 
						|
                )
 | 
						|
                )
 | 
						|
            {
 | 
						|
                #
 | 
						|
                # We have a message with a subject beginning with 'Re:' which
 | 
						|
                # doesn't have any back links. Looks like a broken thread.
 | 
						|
                # Find the thread root by looking for the subject.
 | 
						|
                #
 | 
						|
                my $parent = findParent( $mhash, $key );
 | 
						|
 | 
						|
                #
 | 
						|
                # The parent has to be defined to be meaningful
 | 
						|
                #
 | 
						|
                if ( defined($parent) ) {
 | 
						|
                    #
 | 
						|
                    # Make this message's parent the message we found in two
 | 
						|
                    # ways, via the 'parent' link and the 'in-reply-to'
 | 
						|
                    # header.
 | 
						|
                    #
 | 
						|
                    $mhash->{$key}->{parent} = $parent;
 | 
						|
                    $mhash->{$key}->{'in-reply-to'}
 | 
						|
                        = $mhash->{$parent}->{'message-id'};
 | 
						|
 | 
						|
                    #
 | 
						|
                    # Add this message to the parents children array, making
 | 
						|
                    # sure it's sorted.
 | 
						|
                    #
 | 
						|
                    push( @{ $mhash->{$parent}->{children} }, $key );
 | 
						|
                    $mhash->{$parent}->{children}
 | 
						|
                        = [ sort( @{ $mhash->{$parent}->{children} } ) ];
 | 
						|
 | 
						|
                    #print "Parent:\n";
 | 
						|
                    #dumpMessage( $mhash->{$parent} );
 | 
						|
                    #print "Child:\n";
 | 
						|
                    #dumpMessage( $mhash->{$key} );
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    emit( $silent, "Couldn't find parent of message $key\n" );
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: findParent
 | 
						|
#      PURPOSE: Given a particular element in the message hash find its parent
 | 
						|
#               by processing the subject
 | 
						|
#   PARAMETERS: $mhash          Hashref of all messages
 | 
						|
#               $key            Key into the message hash
 | 
						|
#      RETURNS: The key of the parent
 | 
						|
#  DESCRIPTION: Tries to join an "orphaned" message with its parent by looking
 | 
						|
#               for another instance of the subject. First the subject of this
 | 
						|
#               message must begin with "Re:". This string is removed in order
 | 
						|
#               to perform a search for the message this one is in reference
 | 
						|
#               to. The search is conducted through all of the stored messages
 | 
						|
#               and the matching message must not have a parent or any back
 | 
						|
#               references itself to be acceptable. This is a fairly vague set
 | 
						|
#               of criteria but it does work for a number of unlinked
 | 
						|
#               messages. [Note: The regex used in the search has been modified
 | 
						|
#               to be case insensitive having encountered an instance where
 | 
						|
#               someone reconstructed the subject line in different case.]
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub findParent {
 | 
						|
    my ( $mhash, $key ) = @_;
 | 
						|
 | 
						|
    my $start_subj = $mhash->{$key}->{subject};
 | 
						|
    my $end_subj;
 | 
						|
    my $parent;
 | 
						|
 | 
						|
    if ( $start_subj =~ /^Re:/ ) {
 | 
						|
        ( $end_subj = $start_subj ) =~ s/^Re:\s*//;
 | 
						|
        foreach my $k ( keys( %{$mhash} ) ) {
 | 
						|
            #if ( $msg{$k}->{subject} eq $end_subj ) {
 | 
						|
            if (   $k ne $key
 | 
						|
                && $msg{$k}->{subject} =~ /(?:^Re:\s*)?(?i)$end_subj/ )
 | 
						|
            {
 | 
						|
                if (!(     defined( $msg{$k}->{'in-reply-to'} )
 | 
						|
                        || defined( $msg{$k}->{'references'} )
 | 
						|
                    )
 | 
						|
                    )
 | 
						|
                {
 | 
						|
                    $parent = $k;
 | 
						|
                    last;
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
        return $parent;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        #
 | 
						|
        # Doesn't look like a message with a parent
 | 
						|
        #
 | 
						|
        return; # implicit undef
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: buildThreadSummary
 | 
						|
#      PURPOSE: Build a summary of the thread roots
 | 
						|
#   PARAMETERS: $dt_from        The from date as a DateTime object
 | 
						|
#               $dt_to          The to date as a DateTime object
 | 
						|
#               $config         Hashref of the configuration data
 | 
						|
#               $mhash          Hashref of all messages
 | 
						|
#               $threads        Hashref to contain the resulting thread data
 | 
						|
#               $silent         Boolean determining whether to emit error
 | 
						|
#                               messages
 | 
						|
#      RETURNS: Total number of messages in all eligible threads
 | 
						|
#  DESCRIPTION: We have built a structure containing all of the message
 | 
						|
#               threads we've seen. However, we're only interested in the
 | 
						|
#               threads for a certain period, so we need to work them out
 | 
						|
#               here. A thread root is a message which has no parents, and
 | 
						|
#               we're interested in those which were written in our time
 | 
						|
#               period. We collect the details we want for the final display
 | 
						|
#               and build a weblink to the thread on Gmane. We also count the
 | 
						|
#               number of messages per thread and the grand total. The
 | 
						|
#               reulting structure is what we'll pass to the template we use
 | 
						|
#               to generate the insertion for the Community News notes (and
 | 
						|
#               other stuff).
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub buildThreadSummary {
 | 
						|
    my ( $dt_from, $dt_to, $config, $mhash, $threads, $silent ) = @_;
 | 
						|
 | 
						|
    my ( $count, $total );
 | 
						|
 | 
						|
    #
 | 
						|
    # Use the UTC seconds for comparison
 | 
						|
    #
 | 
						|
    my $from = $dt_from->utc_rd_as_seconds();
 | 
						|
    my $to   = $dt_to->utc_rd_as_seconds();
 | 
						|
 | 
						|
    #
 | 
						|
    # Adjust the regex from the config file for parsing the stored links.
 | 
						|
    # Remove the '<>' enclosing it.
 | 
						|
    #
 | 
						|
    my $re = $config->{cache}->{regex};
 | 
						|
    $re =~ s/^<|>$//g;
 | 
						|
    $re = qr{$re};
 | 
						|
 | 
						|
    #
 | 
						|
    # Find each thread root which has messages in the date range
 | 
						|
    #
 | 
						|
    $total = 0;
 | 
						|
    foreach my $key ( sort( keys( %{$mhash} ) ) ) {
 | 
						|
        if ( !defined( $mhash->{$key}->{parent} )
 | 
						|
            && eligibleThread( $from, $to, $mhash, $key ) )
 | 
						|
#            && ( $key ge $from && $key lt $to ) )
 | 
						|
        {
 | 
						|
            #
 | 
						|
            # Stash the headers we want
 | 
						|
            #
 | 
						|
            foreach my $k (qw{ from to cc date subject archived-at }) {
 | 
						|
                $threads{$key}->{$k} = $mhash->{$key}->{$k};
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Make a weblink to the Gmane thread
 | 
						|
            # TODO What if the regex doesn't match?
 | 
						|
            #
 | 
						|
            if ( $mhash->{$key}->{'archived-at'} =~ /$re/ ) {
 | 
						|
                $threads{$key}->{thread}
 | 
						|
                    = sprintf( $config->{gmane}->{template2}, $1 );
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                emit( $silent,
 | 
						|
                    "Unable to match the Gmane thread (in message $key)\n" );
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Count the messages in the thread and keep a grand total
 | 
						|
            #
 | 
						|
            $count = 0;
 | 
						|
            threadLength( $from, $to, $mhash, $key, \$count );
 | 
						|
            $threads{$key}->{count} = $count;
 | 
						|
            $total += $count;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return $total;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: eligibleThread
 | 
						|
#      PURPOSE: Determines whether a given thread is eligible to be included
 | 
						|
#               in a date range
 | 
						|
#   PARAMETERS: $from           The from date as UTC seconds
 | 
						|
#               $to             The to date as UTC seconds
 | 
						|
#               $mhash          Hashref of all messages
 | 
						|
#               $key            Key into the message hash for this particular
 | 
						|
#                               thread
 | 
						|
#      RETURNS: True (1) if the thread is eligible, otherwise false (0)
 | 
						|
#  DESCRIPTION: Checks the top level message (thread root) for elegibility
 | 
						|
#               (date in range) then recursively descends the thread checking
 | 
						|
#               each message in the same manner as it goes. The results are
 | 
						|
#               ORed together so that if any message is in range the entire
 | 
						|
#               thread is.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub eligibleThread {
 | 
						|
    my ( $from, $to, $mhash, $key ) = @_;
 | 
						|
 | 
						|
    my $res = ( $key ge $from && $key lt $to );
 | 
						|
 | 
						|
    foreach my $k ( @{ $mhash->{$key}->{children} } ) {
 | 
						|
        $res ||= ( $k ge $from && $k lt $to )
 | 
						|
            || eligibleThread( $from, $to, $mhash, $k );
 | 
						|
    }
 | 
						|
 | 
						|
    return $res;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: threadLength
 | 
						|
#      PURPOSE: Count the number of eligible messages in a thread
 | 
						|
#   PARAMETERS: $from           The from date as UTC seconds
 | 
						|
#               $to             The to date as UTC seconds
 | 
						|
#               $mhash          Hashref of all messages
 | 
						|
#               $key            Key into the message hash
 | 
						|
#               $len            Scalar ref containing accumulated length
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Recursively descends through a thread (a tree of linked
 | 
						|
#               messages) counting the elements but only if they are in range.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub threadLength {
 | 
						|
    my ( $from, $to, $mhash, $key, $len ) = @_;
 | 
						|
 | 
						|
    $$len++ if ( $key ge $from && $key lt $to );
 | 
						|
 | 
						|
    foreach my $k ( @{ $mhash->{$key}->{children} } ) {
 | 
						|
        threadLength( $from, $to, $mhash, $k, $len );
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: printThread
 | 
						|
#      PURPOSE: To walk a message thread and print it
 | 
						|
#   PARAMETERS: $mhash          Hashref of all messages
 | 
						|
#               $key            Key into the message hash
 | 
						|
#               $level          Integer denoting recursion level
 | 
						|
#               $html           1 for HTML output, 0 for text
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Recursively descend through a thread (a tree of linked
 | 
						|
#               messages) printing each one. If $html is true the message is
 | 
						|
#               printed in HTML, otherwise it's plain text with indentation.
 | 
						|
#               The level of indentation is controlled by $level which
 | 
						|
#               increments as the tree is recursed.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub printThread {
 | 
						|
    my ( $mhash, $key, $level, $html ) = @_;
 | 
						|
 | 
						|
    $level = 0 unless $level;
 | 
						|
    $html  = 0 unless $html;
 | 
						|
 | 
						|
    printMessage( $mhash->{$key}, $level, $html );
 | 
						|
 | 
						|
    foreach my $k ( @{ $mhash->{$key}->{children} } ) {
 | 
						|
        printThread( $mhash, $k, $level + 1, $html );
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: printMessage
 | 
						|
#      PURPOSE: To print a message for debug purposes
 | 
						|
#   PARAMETERS: $msg            Hashref containing message attributes
 | 
						|
#               $level          Integer for indenting
 | 
						|
#               $html           1 for HTML output, 0 for text
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Print a message. The details held consist only of a set of
 | 
						|
#               headers, and these are printed as a label and a value. If
 | 
						|
#               $html is true the output is HTML (an unnumbered list) and only
 | 
						|
#               headers with values are reported. If $html is false then plain
 | 
						|
#               text is used, empty values are reported and indentation is
 | 
						|
#               used, controlled by the value of $level. In plain text mode
 | 
						|
#               each item ends with a line of hyphens.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub printMessage {
 | 
						|
    my ( $msg, $level, $html ) = @_;
 | 
						|
 | 
						|
    $level = 0 unless $level;
 | 
						|
    $html  = 0 unless $html;
 | 
						|
 | 
						|
    # TODO Don't do it this way, use the global definition since it's used
 | 
						|
    # elsewhere
 | 
						|
    state %tags;
 | 
						|
    %tags = (
 | 
						|
        'from'        => 'From',
 | 
						|
        'to'          => 'To',
 | 
						|
        'cc'          => 'CC',
 | 
						|
        'date'        => 'Date',
 | 
						|
        'subject'     => 'Subject',
 | 
						|
        'archived-at' => 'Link',
 | 
						|
    );
 | 
						|
 | 
						|
    print "<ul>" if $html;
 | 
						|
 | 
						|
    foreach my $k (qw{ from to cc date subject archived-at }) {
 | 
						|
        if ($html) {
 | 
						|
            if ( !empty( $msg->{$k} ) ) {
 | 
						|
                printf "<li>%-8s %s</li>\n", "$tags{$k}:",
 | 
						|
                    coalesce( $msg->{$k}, '' );
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            printf "%s%-8s %s\n", '  ' x $level, "$tags{$k}:",
 | 
						|
                coalesce( $msg->{$k}, '' );
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if ($html) {
 | 
						|
        print "</ul>\n";
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        print '-' x 80, "\n";
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: dumpMessage
 | 
						|
#      PURPOSE: To print all parts of a message hash for debugging
 | 
						|
#   PARAMETERS: $msg            Hashref containing message attributes
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Produces a dump of a message for debugging purposes.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub dumpMessage {
 | 
						|
    my ($msg) = @_;
 | 
						|
 | 
						|
    foreach my $k ( sort( keys( %{$msg} ) ) ) {
 | 
						|
        printf "%12s: %s\n", $k, coalesce( $msg->{$k}, '' );
 | 
						|
    }
 | 
						|
    print '-' x 80, "\n";
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: emit
 | 
						|
#      PURPOSE: Print text on STDERR unless silent mode has been selected
 | 
						|
#   PARAMETERS: - Boolean indicating whether to be silent or not
 | 
						|
#               - list of arguments to 'print'
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: This is a wrapper around 'print' to determine whether to send
 | 
						|
#               a message to STDERR depending on a boolean. We need this to be
 | 
						|
#               able to make the script silent when the -silent option is
 | 
						|
#               selected
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub emit {
 | 
						|
    unless (shift) {
 | 
						|
        print STDERR @_;
 | 
						|
    }
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: Options
 | 
						|
#      PURPOSE: Processes command-line options
 | 
						|
#   PARAMETERS: $optref     Hash reference to hold the options
 | 
						|
#      RETURNS: Undef
 | 
						|
#  DESCRIPTION:
 | 
						|
#       THROWS: no exceptions
 | 
						|
#     COMMENTS: none
 | 
						|
#     SEE ALSO: n/a
 | 
						|
#===============================================================================
 | 
						|
sub Options {
 | 
						|
    my ($optref) = @_;
 | 
						|
 | 
						|
    my @options = (
 | 
						|
        "help",    "debug=i",   "out=s",        "template=s",
 | 
						|
        "silent!", "checknew!", "initialise=i", "from=s",
 | 
						|
        "to=s",
 | 
						|
    );
 | 
						|
 | 
						|
    if ( !GetOptions( $optref, @options ) ) {
 | 
						|
        pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | 
						|
#  Application Documentation
 | 
						|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | 
						|
#{{{
 | 
						|
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
summarise_mail - Generate a summary of messages on the HPR mailing list
 | 
						|
 | 
						|
=head1 VERSION
 | 
						|
 | 
						|
This documentation refers to summarise_mail version 0.0.5
 | 
						|
 | 
						|
 | 
						|
=head1 USAGE
 | 
						|
 | 
						|
    ./summarise_mail [-help] [-debug=N] [-from=DATE] [-to=DATE] [-out=FILE]
 | 
						|
        [-template=FILE] [-[no]silent] [-[no]checknew] [-initialise=N]
 | 
						|
 | 
						|
 | 
						|
=head1 OPTIONS
 | 
						|
 | 
						|
=over 8
 | 
						|
 | 
						|
=item B<-help>
 | 
						|
 | 
						|
Prints a brief help message describing the usage of the program, and then exits.
 | 
						|
 | 
						|
=item B<-initialise=N>
 | 
						|
 | 
						|
This option builds a new mailbox cache. The value B<N> defines the starting
 | 
						|
message to be downloaded from the Gmane site. The number defaults to 1, though
 | 
						|
this is not advisable since it potentially overloads the Gmane server. The
 | 
						|
number of messages downloaded is limited by the B<lookahead> value in the
 | 
						|
configuration file (see below).
 | 
						|
 | 
						|
If a cache file already exists the script will not run. It is necessary to
 | 
						|
delete this file before initialising a new one.
 | 
						|
 | 
						|
After the cache is initialised the script will process the messages it
 | 
						|
contains.
 | 
						|
 | 
						|
The B<-initialise=N> and the B<-[no]checknew> options are mutually exclusive.
 | 
						|
 | 
						|
=item B<-[no]checknew>
 | 
						|
 | 
						|
This option defines whether the script will check Gmane for updates to the
 | 
						|
mailing list. If updates are found then they will be incorporated into the
 | 
						|
cached mailbox file.
 | 
						|
 | 
						|
If omitted no check is made (B<-nochecknew>).
 | 
						|
 | 
						|
The B<-initialise=N> and the B<-[no]checknew> options are mutually exclusive.
 | 
						|
 | 
						|
=item B<-from=DATE>
 | 
						|
 | 
						|
Specifies the starting date for the mail summary.
 | 
						|
 | 
						|
The date format should be B<DD-Mon-YYYY> (e.g. 12-Jun-2014), B<DD-MM-YYYY>
 | 
						|
(e.g. 12-06-2014) or B<YYYY-MM-DD> (e.g. 2014-06-12).
 | 
						|
 | 
						|
If this option is omitted the current date is used.
 | 
						|
 | 
						|
=item B<-to=DATE>
 | 
						|
 | 
						|
Specifies the ending date for the mail summary.
 | 
						|
 | 
						|
The date format should be B<DD-Mon-YYYY> (e.g. 12-Jun-2014), B<DD-MM-YYYY>
 | 
						|
(e.g. 12-06-2014) or B<YYYY-MM-DD> (e.g. 2014-06-12).
 | 
						|
 | 
						|
If this option is omitted the last day of the month defined by the B<-from>
 | 
						|
date is used.
 | 
						|
 | 
						|
=item B<-template=FILE>
 | 
						|
 | 
						|
This option defines the template that will be used to format the report
 | 
						|
generated by the script.
 | 
						|
 | 
						|
If the option is not provided, the script will use a template called
 | 
						|
B<mailnote_template.tpl> in the directory which holds the script. This
 | 
						|
template generates HTML suitable for incorporation into the Community News
 | 
						|
show notes.
 | 
						|
 | 
						|
=item B<-out=FILE>
 | 
						|
 | 
						|
This option defines the output file for the report.  If the option is omitted
 | 
						|
the report is written to STDOUT, allowing it to be redirected if required.
 | 
						|
 | 
						|
The output file name may contain the characters 'B<%s>'. This denotes the point
 | 
						|
at which the year and month in the format B<YYYY-MM> are inserted. For example
 | 
						|
if the script is being run for July 2014 the option:
 | 
						|
 | 
						|
    -out=mailreport_%s.html
 | 
						|
 | 
						|
will cause the generation of the file:
 | 
						|
 | 
						|
    mailreport_2014-07.html
 | 
						|
 | 
						|
=item B<-[no]silent>
 | 
						|
 | 
						|
This option controls whether the script reports minimal details of what it is
 | 
						|
doing to STDERR. If the option is omitted the report is generated (B<-nosilent>).
 | 
						|
 | 
						|
The script reports: the starting and ending dates it is working on and the name of the output file
 | 
						|
(if appropriate).
 | 
						|
 | 
						|
=item B<-debug=N>
 | 
						|
 | 
						|
Runs the script in debug mode if the value of B<N> is greater than 0. The
 | 
						|
default setting is zero.
 | 
						|
 | 
						|
In debug mode the script reports various pieces of information about the
 | 
						|
parsing of the mail messages and building of threads.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
=head2 Overview
 | 
						|
 | 
						|
This script generates a summary of the mail sent to the HPR mailing list over
 | 
						|
a period.
 | 
						|
 | 
						|
Messages to the mailing list are copied to the free B<Gmane> service and the
 | 
						|
script uses this to prepare its report. The Gmane service offers
 | 
						|
a downloadable interface at
 | 
						|
http://download.gmane.org/gmane.network.syndication.podcast.hacker-public-radio
 | 
						|
where selected messages can be collected in B<MBOX> format.
 | 
						|
 | 
						|
=head2 Methodology
 | 
						|
 | 
						|
The script operates in three phases:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
Messages are collected from Gmane and stored in a local cache (a file in MBOX
 | 
						|
format). This is expected to be done once, and thereafter new messages are
 | 
						|
appended to this file.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
New messages are appended to the cache file. This is done by determining the
 | 
						|
last message in the cache then requesting the next block of messages from
 | 
						|
Gmane. The downloaded messages are then added to the file.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
The cache is processed to generate the report. Usually the script is given
 | 
						|
a start date (and optional end date, though this defaults to the end of the
 | 
						|
month in the start date).
 | 
						|
 | 
						|
The algorithm is as follows:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
The script parses out the important headers of all messages in the cache.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
The script walks the header structure in time order joining messages based on
 | 
						|
their 'In-reference-to:' and 'References:' headers. Each message gets its
 | 
						|
parent recorded as a pointer, and its children as an array of pointers. If
 | 
						|
a reference is to a message that is not in the cache then it is not recorded.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
At this point all messages without a parent are either the thread roots or
 | 
						|
cases where someone has not replied properly. The script performs a further
 | 
						|
join when an "orphaned" message has the same subject as another with a 'Re:'
 | 
						|
on the front.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
Now all messages without parents are the best that can be done to detect
 | 
						|
thread roots. These are used to make the report.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
The messages that cannot be threaded are those where the sender does not use
 | 
						|
'Reply' or where they reply to a digest. Digests are not threaded because they
 | 
						|
are blobs of multiple messages.
 | 
						|
 | 
						|
=item -
 | 
						|
 | 
						|
Some messages may be threaded erroneously. This happens when a sender, rather
 | 
						|
than generating a new message, replies to an existing message but changes the
 | 
						|
'Subject:' line. There is not much that can be done to correct this.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head2 Report Template
 | 
						|
 | 
						|
The script uses the B<Template> module to format the report. See the default
 | 
						|
template B<mailnote_template.tpl> for an example.
 | 
						|
 | 
						|
The template is passed two variables:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<total>
 | 
						|
 | 
						|
This is a scalar variable containing a number: the count of messages in the
 | 
						|
specified range.
 | 
						|
 | 
						|
=item B<threads>
 | 
						|
 | 
						|
This is a hash indexed by the message date converted to UTC date then to the
 | 
						|
number of seconds since the Unix epoch. Each hash value is a hashref
 | 
						|
containing the following fields:
 | 
						|
 | 
						|
 Field          Contents
 | 
						|
 -----          --------
 | 
						|
 from           The contents of the 'From:' header
 | 
						|
 to             The contents of the 'To:' header
 | 
						|
 cc             The contents of the 'Cc:' header
 | 
						|
 date           The contents of the 'Date:' header
 | 
						|
 subject        The contents of the 'Subject:' header
 | 
						|
 archived-at    The contents of the 'Archived-at:' header (the Gmane
 | 
						|
                permalink URL)
 | 
						|
 count          The number of messages in the thread
 | 
						|
 | 
						|
These messages are the roots of all of the detectable threads in the period.
 | 
						|
Sorting them by the main hash key results in the threads being ordered by
 | 
						|
their timestamp.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 DIAGNOSTICS
 | 
						|
 | 
						|
A list of every error and warning message that the application can generate
 | 
						|
(even the ones that will "never happen"), with a full explanation of each
 | 
						|
problem, one or more likely causes, and any suggested remedies. If the
 | 
						|
application generates exit status codes (e.g. under Unix) then list the exit
 | 
						|
status associated with each error.
 | 
						|
 | 
						|
 | 
						|
=head1 CONFIGURATION AND ENVIRONMENT
 | 
						|
 | 
						|
The script obtains the information it requires to find the mail details from
 | 
						|
B<Gmane> from a configuration file. The name of the file it expects is
 | 
						|
B<.summarise_mail.cfg> in the
 | 
						|
directory holding the script. To change this will require changing the script.
 | 
						|
 | 
						|
The configuration file format is as follows:
 | 
						|
 | 
						|
 <gmane>
 | 
						|
     url = http://download.gmane.org/gmane.network.syndication.podcast.hacker-public-radio
 | 
						|
     template = "$url/%d/%d"
 | 
						|
     lookahead = 100
 | 
						|
 </gmane>
 | 
						|
 | 
						|
 <cache>
 | 
						|
     directory = /home/dave/Community_News/mail_cache
 | 
						|
     filename = gmane.mbox
 | 
						|
     regex = "<http://permalink\.gmane\.org/gmane\.network\.syndication\.podcast\.hacker-public-radio/(\d+)>"
 | 
						|
 </cache>
 | 
						|
 | 
						|
The elements of this configuration file are explained below. First the
 | 
						|
B<gmane> section:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<url>
 | 
						|
 | 
						|
This is the base URL on the Gmane site. It is used to construct URLs for
 | 
						|
collecting mail messages. This should not require any changes.
 | 
						|
 | 
						|
=item B<template>
 | 
						|
 | 
						|
This is a template which is used to generate the actual URL used to download
 | 
						|
messages. The script replaces each '%d' with a number. The first is the
 | 
						|
starting message number, and the second the ending number plus one.
 | 
						|
 | 
						|
This should not require any changes.
 | 
						|
 | 
						|
=item B<lookahead>
 | 
						|
 | 
						|
This defines the maximum number of messages to be collected. This number is
 | 
						|
added to the first number used when contructing the URL from the B<template>
 | 
						|
above. Normally just a few messages are downloaded at a time, but when the
 | 
						|
script is used to initialise the mailbox cache file this number of messages may
 | 
						|
be downloaded.
 | 
						|
 | 
						|
This value can be changed if desired, but it is set at 100 in order to avoid
 | 
						|
overloading the Gmane servers.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
The B<cache> section contains the following:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<directory>
 | 
						|
 | 
						|
This defines the full path to the directory holding the mail cache file.
 | 
						|
 | 
						|
This may be changed to reflect a change of location.
 | 
						|
 | 
						|
=item B<filename>
 | 
						|
 | 
						|
This defines the name of the mail cache. It is expected to be found in the
 | 
						|
directory defined above.
 | 
						|
 | 
						|
This may be changed to reflect a change of name.
 | 
						|
 | 
						|
=item B<regex>
 | 
						|
 | 
						|
Individual messages in the cache each hold an URL which provides a permanent
 | 
						|
link to the message on the Gmane site. The script uses the regular expression
 | 
						|
defined here to collect the message number from this URL. This is how the
 | 
						|
script determines the number of the oldest message in the cache when looking
 | 
						|
for new messages.
 | 
						|
 | 
						|
This expression should only be changed with great caution.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 DEPENDENCIES
 | 
						|
 | 
						|
 Getopt::Long
 | 
						|
 Pod::Usage
 | 
						|
 Config::General
 | 
						|
 LWP::UserAgent
 | 
						|
 Mail::Box::Manager
 | 
						|
 Date::Parse
 | 
						|
 Date::Calc
 | 
						|
 DateTime
 | 
						|
 DateTime::TimeZone
 | 
						|
 Template
 | 
						|
 Template::Filters
 | 
						|
 Data::Dumper
 | 
						|
 | 
						|
 | 
						|
=head1 BUGS AND LIMITATIONS
 | 
						|
 | 
						|
There are no known bugs in this module.
 | 
						|
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
 | 
						|
Patches are welcome.
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Dave Morriss (Dave.Morriss@gmail.com)
 | 
						|
 | 
						|
 | 
						|
=head1 LICENCE AND COPYRIGHT
 | 
						|
 | 
						|
Copyright (c) 2014, 2015 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.
 | 
						|
 | 
						|
This module is free software; you can redistribute it and/or
 | 
						|
modify it under the same terms as Perl itself. See perldoc perlartistic.
 | 
						|
 | 
						|
This program is distributed in the hope that it will be useful,
 | 
						|
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
#}}}
 | 
						|
 | 
						|
# [zo to open fold, zc to close]
 | 
						|
 | 
						|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 |