forked from HPR/hpr-tools
		
	
		
			
	
	
		
			1711 lines
		
	
	
		
			56 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			1711 lines
		
	
	
		
			56 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: summarise_mail | ||
|  | # | ||
|  | #        USAGE: ./summarise_mail [-help] [-debug=N] [-from=DATE] [-to=DATE] | ||
|  | #               [-out=FILE] [-template=FILE] [-[no]silent] [-[no]checknew] | ||
|  | #               [-initialise=N] | ||
|  | # | ||
|  | #  DESCRIPTION: Generates a summary of the HPR mailing list for the Community | ||
|  | #  		News | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.5 | ||
|  | #      CREATED: 2015-02-15 15:06:11 | ||
|  | #     REVISION: 2015-10-04 20:21:44 | ||
|  | # | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # TODO | ||
|  | # | ||
|  | # - Add a means of reporting all of the message bodies in a thread. They | ||
|  | #   ideally need formatting and should be printable so that a template can | ||
|  | #   generate plain text versions for use by the Community News hosts when | ||
|  | #   reviewing the last month's messages. This is quite a large task but maybe | ||
|  | #   there are mail formatters that can do it. | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use 5.010; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use utf8; | ||
|  | 
 | ||
|  | use Getopt::Long; | ||
|  | use Pod::Usage; | ||
|  | use Config::General; | ||
|  | 
 | ||
|  | use LWP::UserAgent; | ||
|  | use Mail::Box::Manager; | ||
|  | 
 | ||
|  | use Date::Parse; | ||
|  | use Date::Calc qw{:all}; | ||
|  | use DateTime; | ||
|  | use DateTime::TimeZone; | ||
|  | 
 | ||
|  | use Template; | ||
|  | use Template::Filters; | ||
|  | Template::Filters->use_html_entities;    # Use HTML::Entities in the template | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.0.5'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script and directory names | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | $DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir    = "$ENV{HOME}/HPR/Community_News"; | ||
|  | my $configfile = "$basedir/.$PROG.cfg"; | ||
|  | my $bpfile     = "$basedir/mailnote_template.tpl"; | ||
|  | 
 | ||
|  | my (%dtargs, $dt,      $tz,    $rd,      $mid, | ||
|  |     $irt,    $ref,     $date,  @parsed,  %msg, | ||
|  |     %id,     $dt_from, $dt_to, %threads, $total, | ||
|  | ); | ||
|  | 
 | ||
|  | my %tags = ( | ||
|  |     'from'        => 'From', | ||
|  |     'to'          => 'To', | ||
|  |     'cc'          => 'CC', | ||
|  |     'date'        => 'Date', | ||
|  |     'subject'     => 'Subject', | ||
|  |     'archived-at' => 'Link', | ||
|  | ); | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | # | ||
|  | # Load configuration data | ||
|  | # | ||
|  | my $conf = Config::General->new( | ||
|  |     -ConfigFile      => $configfile, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1, | ||
|  | ) or die "Unable to open $configfile\n"; | ||
|  | 
 | ||
|  | my %config = $conf->getall() or die "Unable to process $configfile\n"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Options and arguments | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Process options | ||
|  | # | ||
|  | my %options; | ||
|  | Options( \%options ); | ||
|  | 
 | ||
|  | # | ||
|  | # Default help | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 ) | ||
|  |     if ( $options{'help'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Collect options | ||
|  | # | ||
|  | my $debug      = ( defined( $options{debug} )      ? $options{debug}    : 0 ); | ||
|  | my $initialise = ( defined( $options{initialise} ) ? 1                  : 0 ); | ||
|  | my $start      = $options{initialise}; | ||
|  | my $silent     = ( defined( $options{silent} )     ? $options{silent}   : 0 ); | ||
|  | my $checknew   = ( defined( $options{checknew} )   ? $options{checknew} : 0 ); | ||
|  | my $outfile    = $options{out}; | ||
|  | 
 | ||
|  | my @fromdate; | ||
|  | if ( defined( $options{from} ) ) { | ||
|  |     @fromdate = parseDate( $options{from} ); | ||
|  | } | ||
|  | 
 | ||
|  | my @todate; | ||
|  | if ( defined( $options{to} ) ) { | ||
|  |     @todate = parseDate( $options{to} ); | ||
|  | } | ||
|  | 
 | ||
|  | my $template | ||
|  |     = ( defined( $options{template} ) ? $options{template} : $bpfile ); | ||
|  | 
 | ||
|  | # | ||
|  | # Check for illegal option combinations | ||
|  | # | ||
|  | die "Use only one of -initialise and -checknew\n" | ||
|  |     if ( $initialise && $checknew ); | ||
|  | 
 | ||
|  | # | ||
|  | # Sanity checks | ||
|  | # | ||
|  | die "Error: Unable to find template $template\n" unless -r $template; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Default the from date to today and compute a to date if one wasn't given | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ( scalar(@fromdate) == 0 ) { | ||
|  |     @fromdate = Today(); | ||
|  | } | ||
|  | 
 | ||
|  | if ( scalar(@todate) == 0 ) { | ||
|  |     @todate = Add_Delta_Days( @fromdate[ 0, 1 ], | ||
|  |         1, Days_in_Month( @fromdate[ 0, 1 ] ) ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # We need DateTime dates for comparison | ||
|  | # | ||
|  | emit( $silent, "From: ", ISO8601_Date(@fromdate), "\n" ); | ||
|  | emit( $silent, "To:   ", ISO8601_Date(@todate),   "\n" ); | ||
|  | 
 | ||
|  | $dt_from = calcToDT(@fromdate); | ||
|  | $dt_to   = calcToDT(@todate); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Open the output file (or STDOUT) - we may need the date to do it | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $outfh; | ||
|  | if ($outfile) { | ||
|  |     $outfile = sprintf( $outfile, | ||
|  |         sprintf( "%d-%02d", $dt_from->year, $dt_from->month ) ) | ||
|  |         if ( $outfile =~ /%s/ ); | ||
|  |     emit( $silent, "Output: ", $outfile, "\n" ); | ||
|  | 
 | ||
|  |     open( $outfh, ">:encoding(UTF-8)", $outfile ) | ||
|  |         or die "Unable to open $outfile for writing: $!"; | ||
|  | } | ||
|  | else { | ||
|  |     open( $outfh, ">&", \*STDOUT ) | ||
|  |         or die "Unable to initialise for writing: $!"; | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Check the configuration file | ||
|  | #------------------------------------------------------------------------------- | ||
|  | checkConfig( \%config ); | ||
|  | 
 | ||
|  | # | ||
|  | # The cached mail file | ||
|  | # | ||
|  | my $mailfile = join( '/', @{ $config{cache} }{ 'directory', 'filename' } ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # If asked to initialise the cache check it makes sense, otherwise look to see | ||
|  | # if we're to update an existing cache | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($initialise) { | ||
|  |     if ( -e $mailfile ) { | ||
|  |         die "The mail cache $mailfile exists.\n" | ||
|  |             . "Delete it before initialising\n"; | ||
|  |     } | ||
|  |     unless (initialiseCache( \%config, $start )) { | ||
|  |         print "Failed to initialise the cache; can't continue\n"; | ||
|  |         exit; | ||
|  |     } | ||
|  | } | ||
|  | elsif ($checknew) { | ||
|  |     unless ( -e $mailfile ) { | ||
|  |         die "The mail cache $mailfile does not exist\n" | ||
|  |             . "Initialise it before checking for updates\n"; | ||
|  |     } | ||
|  |     unless (updateCache( \%config )) { | ||
|  |         print "Failed to update the cache; continuing with old mail\n"; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Open the mail file (as a folder in MBOX format) | ||
|  | # | ||
|  | my $mgr    = Mail::Box::Manager->new; | ||
|  | my $folder = $mgr->open($mailfile); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Walk through the messages in the folder collecting message details into | ||
|  | # a hash structure indexed by the UTC timestamp. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | foreach my $message ( $folder->messages ) { | ||
|  |     # | ||
|  |     # TODO This doesn't work for a reason I don't understand. Needs to be | ||
|  |     # fixed | ||
|  |     # | ||
|  |     #$message->unfold(); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Parse and convert the message date. The @parsed array contains | ||
|  |     # $ss,$mm,$hh,$day,$month,$year,$zone. The year and month need adjustment. | ||
|  |     # The timezone value is number of seconds from UTC. | ||
|  |     # | ||
|  |     $date   = $message->get('Date'); | ||
|  |     @parsed = strptime($date); | ||
|  |     $parsed[5] += 1900; | ||
|  |     $parsed[4] += 1; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Convert the parsed date to a DateTime | ||
|  |     # | ||
|  |     @dtargs{ 'second', 'minute', 'hour', 'day', 'month', 'year' } = (@parsed); | ||
|  |     $dt = DateTime->new(%dtargs); | ||
|  |     $tz = DateTime::TimeZone->new( | ||
|  |         name => DateTime::TimeZone->offset_as_string( $parsed[6] ) ); | ||
|  |     $dt->set_time_zone($tz); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Stash mail parameters | ||
|  |     # | ||
|  |     $rd  = $dt->utc_rd_as_seconds(); | ||
|  |     $mid = formatID( $message->get('Message-ID'), 0 ); | ||
|  |     $irt = formatID( $message->get('In-Reply-To'), 1 ); | ||
|  | 
 | ||
|  |     my @from = $message->from; | ||
|  |     my @to   = $message->to; | ||
|  |     my @cc   = $message->cc; | ||
|  | 
 | ||
|  |     $msg{$rd} = { | ||
|  |         # '_date' => $date, | ||
|  |         # '_from' => \@ffrom, | ||
|  |         # '_to'   => \@fto, | ||
|  |         # '_cc'   => \@fcc, | ||
|  |         '_mid'    => $message->get('Message-ID'), | ||
|  |         '_irt'    => $message->get('In-Reply-To'), | ||
|  |         '_rdate'  => $rd, | ||
|  |         'date'    => $dt->ymd . ' ' . $dt->hms . ' ' . $dt->time_zone->name, | ||
|  |         'from'    => formatEmail( \@from ), | ||
|  |         'to'      => formatEmail( \@to ), | ||
|  |         'cc'      => formatEmail( \@cc ), | ||
|  |         'subject' => $message->get('Subject'), | ||
|  |         'archived-at' => trimHeader( $message->get('Archived-At') ), | ||
|  |         'message-id'  => $mid, | ||
|  |         'in-reply-to' => $irt, | ||
|  |         'references'  => formatReferences( $message->get('References') ), | ||
|  |         'parent'      => undef, | ||
|  |         'children'    => [], | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Stash the message id for easier linking | ||
|  |     # | ||
|  |     $id{$mid} = $rd; | ||
|  | } | ||
|  | 
 | ||
|  | $folder->close; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Link the messages according to their internal back-references. Rescue any | ||
|  | # broken threads caused by people not using their mail clients properly. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | linkMessages( \%msg, \%id, ( $debug == 0 ) ); | ||
|  | repairThreads( \%msg, ( $debug == 0 ) ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Generate the hash of message thread details that start in the selected | ||
|  | # period. Prepare the template and generate the final document | ||
|  | #------------------------------------------------------------------------------- | ||
|  | $total = buildThreadSummary( $dt_from, $dt_to, \%config, \%msg, \%threads, | ||
|  |     ( $debug == 0 ) ); | ||
|  | 
 | ||
|  | my $tt = Template->new( | ||
|  |     {   ABSOLUTE     => 1, | ||
|  |         ENCODING     => 'utf8', | ||
|  |         INCLUDE_PATH => $basedir, | ||
|  |     } | ||
|  | ); | ||
|  | my $vars = { | ||
|  |     total   => $total, | ||
|  |     threads => \%threads, | ||
|  | }; | ||
|  | my $document; | ||
|  | $tt->process( $template, $vars, \$document, { binmode => ':utf8' } ) | ||
|  |     || die $tt->error(), "\n"; | ||
|  | print $outfh $document; | ||
|  | close($outfh); | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: checkConfig | ||
|  | #      PURPOSE: Check the configuration, aborting if it's messed up and | ||
|  | #               missing necessary items | ||
|  | #   PARAMETERS: $config         configuration hash | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Checks that all of the required items are present in the | ||
|  | #               $config hash. These have been parsed from the configuration | ||
|  | #               file usually called '.summarise_mail.cfg'. It only performs | ||
|  | #               existence checks, nothing more fancy. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub checkConfig { | ||
|  |     my ($config) = @_; | ||
|  | 
 | ||
|  |     die "Configuration file problem\n" | ||
|  |         unless ( defined( $config->{gmane}->{url} ) | ||
|  |         && defined( $config->{gmane}->{template1} ) | ||
|  |         && defined( $config->{gmane}->{thread} ) | ||
|  |         && defined( $config->{gmane}->{template2} ) | ||
|  |         && defined( $config->{gmane}->{lookahead} ) | ||
|  |         && defined( $config->{cache}->{directory} ) | ||
|  |         && defined( $config->{cache}->{filename} ) | ||
|  |         && defined( $config->{cache}->{regex} ) ); | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: initialiseCache | ||
|  | #      PURPOSE: Load a new cache from Gmane | ||
|  | #   PARAMETERS: $config         Configuration hashref | ||
|  | #               $start          Start number to download | ||
|  | #      RETURNS: If the GET succeeded then 1 otherwise 0 | ||
|  | #  DESCRIPTION: Creates a new cache file of mail downloaded from Gmane. The | ||
|  | #               file is in MBOX format but has had Gmane's obscuration | ||
|  | #               algorithms applied to it, messing up certain elements like | ||
|  | #               message-ids. The download begins with message $start and | ||
|  | #               finishes with $start+LOOKAHEAD where the lookahead value is | ||
|  | #               defined in the configuration file; usually 100. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub initialiseCache { | ||
|  |     my ( $config, $start ) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # How much to look ahead in Gmane; defined in the configuration | ||
|  |     # | ||
|  |     my $lookahead = $config->{gmane}->{lookahead}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # The name of the mail cache and its directory are in the configuration | ||
|  |     # | ||
|  |     my $mailfile | ||
|  |         = join( '/', @{ $config->{cache} }{ 'directory', 'filename' } ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Make the URL for download | ||
|  |     # | ||
|  |     my $url = sprintf( $config->{gmane}->{template1}, | ||
|  |         $start, $start + $lookahead + 1 ); | ||
|  |     print "URL: $url\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Set up the HTTP GET | ||
|  |     # | ||
|  |     my $ua = LWP::UserAgent->new; | ||
|  |     $ua->agent('HPR-Agent/0.1'); | ||
|  |     $ua->timeout(10); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Get the data (if any) | ||
|  |     # | ||
|  |     my $response = $ua->get($url); | ||
|  | 
 | ||
|  |     if ( $response->is_success ) { | ||
|  |         # | ||
|  |         # The GET succeeded, see what came back | ||
|  |         # | ||
|  |         if ( length( ${ $response->content_ref } ) > 0 ) { | ||
|  |             # | ||
|  |             # We got some new data. Append it to the rest | ||
|  |             # | ||
|  |             print "Mail messages found\n"; | ||
|  |             open( my $fh, ">", $mailfile ) | ||
|  |                 or die "Unable to open $mailfile for writing\n"; | ||
|  |             print $fh $response->decoded_content; | ||
|  |             close($fh); | ||
|  |         } | ||
|  |         else { | ||
|  |             print "No messages found\n"; | ||
|  |         } | ||
|  | 
 | ||
|  |         return 1; | ||
|  |     } | ||
|  |     else { | ||
|  |         # | ||
|  |         # The GET failed in a nasty way | ||
|  |         # | ||
|  |         warn $response->status_line; | ||
|  | 
 | ||
|  |         return 0; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: updateCache | ||
|  | #      PURPOSE: Get the latest mail messages from Gmane and save them in the | ||
|  | #               cache | ||
|  | #   PARAMETERS: $config         configuration hash | ||
|  | #      RETURNS: If the GET succeeded then 1 otherwise 0 | ||
|  | #  DESCRIPTION: Adds more messages downloaded from Gmane to the end of the | ||
|  | #               current cache file. It needs to work out the number of the | ||
|  | #               last message in the cache, which it does with a regex, looking | ||
|  | #               at 'Archived-At:' headers. Then it adds the lookahead value | ||
|  | #               from the configuration file to that and downloads a maximum of | ||
|  | #               that number of messages using the Gmane API. These messages | ||
|  | #               are appended to the existing MBOX file. There is no means of | ||
|  | #               expiring older messages, and perhaps there should be. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub updateCache { | ||
|  |     my ($config) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # How much to look ahead in Gmane; defined in the configuration | ||
|  |     # | ||
|  |     my $lookahead = $config->{gmane}->{lookahead}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # The regex to parse the permalink URL is in the configuration | ||
|  |     # | ||
|  |     my $re = qr{$config->{cache}->{regex}}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # The name of the mail cache and its directory are in the configuration | ||
|  |     # | ||
|  |     my $mailfile | ||
|  |         = join( '/', @{ $config->{cache} }{ 'directory', 'filename' } ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Open the mail file | ||
|  |     # | ||
|  |     my $mgr    = Mail::Box::Manager->new; | ||
|  |     my $folder = $mgr->open($mailfile); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Walk all the messages, parsing the permalink URLs to get the start and | ||
|  |     # end | ||
|  |     # | ||
|  |     my $plink; | ||
|  |     my ( $msgno, $start, $end ) = ( 0, 0, 0 ); | ||
|  |     foreach my $message ( $folder->messages ) { | ||
|  |         $plink = $message->get('Archived-At'); | ||
|  |         if ( ($msgno) = ( $plink =~ /$re/ ) ) { | ||
|  |             if ( $start eq 0 ) { | ||
|  |                 $start = $msgno; | ||
|  |             } | ||
|  |             else { | ||
|  |                 $end = $msgno; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     $folder->close; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Build the new URL to get more messages, starting from the last one we | ||
|  |     # have plus 1 for $lookahead+1 messages | ||
|  |     # | ||
|  |     my $url = sprintf( $config->{gmane}->{template1}, | ||
|  |         $end + 1, $end + $lookahead + 1 ); | ||
|  |     print "URL: $url\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Set up the HTTP GET | ||
|  |     # | ||
|  |     my $ua = LWP::UserAgent->new; | ||
|  |     $ua->agent('HPR-Agent/0.1'); | ||
|  |     $ua->timeout(10); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Get the data (if any) | ||
|  |     # | ||
|  |     my $response = $ua->get($url); | ||
|  | 
 | ||
|  |     if ( $response->is_success ) { | ||
|  |         # | ||
|  |         # The GET succeeded, see what came back | ||
|  |         # | ||
|  |         if ( length( ${ $response->content_ref } ) > 0 ) { | ||
|  |             # | ||
|  |             # We got some new data. Append it to the rest | ||
|  |             # | ||
|  |             print "New messages found\n"; | ||
|  |             open( my $fh, ">>", $mailfile ) | ||
|  |                 or die "Unable to open $mailfile for appending\n"; | ||
|  |             print $fh $response->decoded_content; | ||
|  |             close($fh); | ||
|  |         } | ||
|  |         else { | ||
|  |             print "No new messages\n"; | ||
|  |         } | ||
|  | 
 | ||
|  |         return 1; | ||
|  |     } | ||
|  |     else { | ||
|  |         # | ||
|  |         # The GET failed in a nasty way | ||
|  |         # | ||
|  |         warn $response->status_line; | ||
|  | 
 | ||
|  |         return 0; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: parseDate | ||
|  | #      PURPOSE: Parse and check a date | ||
|  | #   PARAMETERS: $date           Date string | ||
|  | #      RETURNS: Date::Calc date or 'undef' if $date is undefined | ||
|  | #  DESCRIPTION: Parses a date string using 'strptime' and converts it to | ||
|  | #               a Date::Calc date list | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub parseDate { | ||
|  |     my ($date) = @_; | ||
|  | 
 | ||
|  |     if ( defined($date) ) { | ||
|  |         # | ||
|  |         # Parse and format the date | ||
|  |         # | ||
|  |         my @parsed = strptime($date); | ||
|  |         @parsed[ 4, 5 ] = ( $parsed[4] + 1, $parsed[5] + 1900 ); | ||
|  | 
 | ||
|  |         return ( @parsed[ 5, 4, 3 ] ); | ||
|  |     } | ||
|  |     else { | ||
|  |         return; # implicit undef | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: ISO8601_Date | ||
|  | #      PURPOSE: Format a Date::Calc date in ISO8601 format | ||
|  | #   PARAMETERS: @date   - a date in the Date::Calc format | ||
|  | #      RETURNS: Text string containing a YYYY-MM-DD date | ||
|  | #  DESCRIPTION: Just a convenience to allow a simple call like | ||
|  | #               $str = ISO8601_Date(@date) | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub ISO8601_Date { | ||
|  |     my (@date) = (@_)[ 0, 1, 2 ]; | ||
|  | 
 | ||
|  |     if ( check_date(@date) ) { | ||
|  |         return sprintf( "%04d-%02d-%02d", @date ); | ||
|  |     } | ||
|  |     else { | ||
|  |         return "*Invalid Date*"; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: calcToDT | ||
|  | #      PURPOSE: Convert a Date::Calc date to a UTC DateTime date | ||
|  | #   PARAMETERS: $date           Date::Calc date | ||
|  | #      RETURNS: A DateTime date in the UTC timezone | ||
|  | #  DESCRIPTION: Reformats a Date::Calc date to a DateTime date based on UTC | ||
|  | #               (for date comparison) | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub calcToDT { | ||
|  |     my (@date) = (@_)[ 0, 1, 2 ]; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Transfer Date::Calc values into a hash for initialising a DateTime | ||
|  |     # object. | ||
|  |     # | ||
|  |     my ( %dtargs, $dt ); | ||
|  |     @dtargs{ 'year', 'month', 'day', 'time_zone' } = ( @date, 'UTC' ); | ||
|  |     $dt = DateTime->new(%dtargs); | ||
|  |     return $dt; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: formatEmail | ||
|  | #      PURPOSE: Trims an array of anonymised Gmane email addresss in | ||
|  | #               Mail::Address objects | ||
|  | #   PARAMETERS: $remails         The array of Mail::Address objects | ||
|  | #      RETURNS: An array containing the trimmed addresses formatted as text | ||
|  | #  DESCRIPTION: These addresses look like "Ken Fallon | ||
|  | #               <ken-sq8tSfcSOvWzQB+pC5nmwQ@public.gmane.org>". The display | ||
|  | #               name part (Ken Fallon) is anonymised if it contains an | ||
|  | #               address-like specification. The local-part of the address is | ||
|  | #               anonymised and the domain replaced by a Gmane domain. The | ||
|  | #               front part of the local-part is unaffected. We want to build | ||
|  | #               an address from the display name (if there is one), and the | ||
|  | #               front of the local-part. So the above becomes: | ||
|  | #               "Ken Fallon <ken@...>" | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub formatEmail { | ||
|  |     my ($remails) = @_; | ||
|  | 
 | ||
|  |     my ( $phrase, $address, $comment, @results ); | ||
|  | 
 | ||
|  |     foreach my $email ( @{$remails} ) { | ||
|  |         # | ||
|  |         # Parse out the components | ||
|  |         # | ||
|  |         ( $phrase, $address, $comment ) | ||
|  |             = ( $email->phrase, $email->address, $email->comment ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Assume Gmane obscured any email address wherever it is. Strip out | ||
|  |         # the obscuring part to make it more readable | ||
|  |         # | ||
|  |         foreach my $item ( $phrase, $address, $comment ) { | ||
|  |             if ( $item =~ /^"?([^-]+).+@.+$/ ) { | ||
|  |                 $item = "$1\@..."; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Let the module reassemble the address in the right format, and add | ||
|  |         # it to the result list | ||
|  |         # | ||
|  |         my $obj = Mail::Address->new( $phrase, $address, $comment ); | ||
|  |         push( @results, $obj->format ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return \@results; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: formatID | ||
|  | #      PURPOSE: Reformat a Message-ID: or In-Reply-To: value to undo the | ||
|  | #               effects of Gmane's mail address obscuration which is applied | ||
|  | #               to one but not the other. | ||
|  | #   PARAMETERS: $header         The header for processing | ||
|  | #               $gmane          Boolean to indicate whether we need to strip | ||
|  | #                               a Gmane obscuration part | ||
|  | #      RETURNS: The processed header | ||
|  | #  DESCRIPTION: If $gmane is true recognises the possible Gmane obscuration | ||
|  | #               patterns and extracts the important elements from the ID, | ||
|  | #               otherwise just strips the '@' and what follows. This is | ||
|  | #               because Gmane (presumably) detects the '@' in every header | ||
|  | #               type, assumes it's an address and applies its obscuring | ||
|  | #               technique to it. Shame they apply such a sledgehammer | ||
|  | #               algorithm. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub formatID { | ||
|  |     my ( $header, $gmane ) = @_; | ||
|  | 
 | ||
|  |     return $header unless defined($header); | ||
|  | 
 | ||
|  |     $header =~ s/(^<|>$)//g; | ||
|  |     if ($gmane) { | ||
|  |         if ( $header =~ /^(.+)-[^-]+@.+$/ ) { | ||
|  |             $header = "$1"; | ||
|  |         } | ||
|  |         elsif ( $header =~ /^(.+)@.+$/ ) { | ||
|  |             $header = "$1"; | ||
|  |         } | ||
|  |     } | ||
|  |     else { | ||
|  |         $header =~ s/@.+$//; | ||
|  |     } | ||
|  | 
 | ||
|  |     return $header; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: formatReferences | ||
|  | #      PURPOSE: Reformat a list of references in the References: header | ||
|  | #   PARAMETERS: $header         The header for processing | ||
|  | #      RETURNS: The processed header as an arrayref | ||
|  | #  DESCRIPTION: Removes the '<>' around each reference, but also removes the | ||
|  | #               bit after and including the '@' since we need to compare it | ||
|  | #               with the ID values we have had to tidy after Gmane got to | ||
|  | #               them. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub formatReferences { | ||
|  |     my ($header) = @_; | ||
|  | 
 | ||
|  |     return $header unless defined($header); | ||
|  | 
 | ||
|  |     my ( @refs, @results ); | ||
|  | 
 | ||
|  |     @refs = split( /\s+/, $header ); | ||
|  |     foreach my $ref (@refs) { | ||
|  |         $ref =~ s/(^<|>$)//g; | ||
|  |         $ref =~ s/@.+$//; | ||
|  |         push( @results, $ref ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return \@results; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: trimHeader | ||
|  | #      PURPOSE: Trims an arbitrary header which is enclosed in "<..>" | ||
|  | #   PARAMETERS: $header         The header for trimming | ||
|  | #      RETURNS: The trimmed header | ||
|  | #  DESCRIPTION: Certain Gmane headers like "Message-Id" are enclosed in "<>" | ||
|  | #               marks. We want to strip these and return the result | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub trimHeader { | ||
|  |     my ($header) = @_; | ||
|  | 
 | ||
|  |     return $header unless defined($header); | ||
|  | 
 | ||
|  |     $header =~ s/(^<|>$)//g; | ||
|  | 
 | ||
|  |     return $header; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: coalesce | ||
|  | #      PURPOSE: To find the first defined argument and return it | ||
|  | #   PARAMETERS: Arbitrary number of arguments | ||
|  | #      RETURNS: The first defined argument or undef if there are none | ||
|  | #  DESCRIPTION: Modelled on the SQL function of the same name. It takes a list | ||
|  | #               of arguments, scans it for the first one that is not undefined | ||
|  | #               and returns it. If an argument is defined and it's an arrayref | ||
|  | #               then the referenced array is returned comma-delimited. This | ||
|  | #               allows calls such as "coalesce($var,'undef')" which returns | ||
|  | #               the value of $var if it's defined, and 'undef' if not and | ||
|  | #               doesn't break anything along the way. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub coalesce { | ||
|  |     foreach (@_) { | ||
|  |         if ( defined($_) ) { | ||
|  |             if ( ref($_) eq 'ARRAY' ) { | ||
|  |                 return join( ',', @{$_} ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 return $_; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     return; # implicit undef | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: empty | ||
|  | #      PURPOSE: Determine whether the argument contains data | ||
|  | #   PARAMETERS: $arg            Argument | ||
|  | #      RETURNS: 0 if data found, otherwise 1 | ||
|  | #  DESCRIPTION: If an argument is defined and it's an arrayref then the array | ||
|  | #               must contain elements otherwise it's regarded as empty. An | ||
|  | #               undefined argument is empty of course. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub empty { | ||
|  |     my ($arg) = @_; | ||
|  | 
 | ||
|  |     if ( defined($arg) ) { | ||
|  |         if ( ref($arg) eq 'ARRAY' ) { | ||
|  |             return scalar(@$arg) == 0; | ||
|  |         } | ||
|  |         else { | ||
|  |             return 0; | ||
|  |         } | ||
|  |     } | ||
|  |     else { | ||
|  |         return 1; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: linkMessages | ||
|  | #      PURPOSE: Link the collected messages using the 'In-Reply-To:' and | ||
|  | #               'References' headers | ||
|  | #   PARAMETERS: $msg            Hashref containing the messages | ||
|  | #               $id             Hashref containing message-id to message key | ||
|  | #                               links | ||
|  | #               $silent         Boolean determining whether to emit error | ||
|  | #                               messages | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: We want to build a structure where messages contain forward | ||
|  | #               "child" links to other messages. So, given a message, we look | ||
|  | #               for backward links in the 'In-Reply-To:' and 'References' | ||
|  | #               headers, go to the referenced message(s) and make the current | ||
|  | #               one a child. We don't do anything with multiple 'References', | ||
|  | #               though we could. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub linkMessages { | ||
|  |     my ( $msg, $id, $silent ) = @_; | ||
|  | 
 | ||
|  |     my $irt; | ||
|  | 
 | ||
|  |     foreach my $key ( sort( keys( %{$msg} ) ) ) { | ||
|  |         $irt = $msg->{$key}->{'in-reply-to'}; | ||
|  | 
 | ||
|  |         if ( defined($irt) ) { | ||
|  |             # | ||
|  |             # There's an In-Reply-To: header | ||
|  |             # | ||
|  |             if ( defined( $id->{$irt} ) ) { | ||
|  |                 # | ||
|  |                 # The parent of this message is the key of the back reference and | ||
|  |                 # we add this key to the children of the parent of this message | ||
|  |                 # | ||
|  |                 $msg->{$key}->{parent} = $id->{$irt}; | ||
|  |                 push( | ||
|  |                     @{ $msg->{ $msg->{$key}->{parent} }->{children} }, | ||
|  |                     $id->{ $msg->{$key}->{'message-id'} } | ||
|  |                 ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit( $silent, | ||
|  |                     "Unable to find Message-ID: $irt (in message $key)\n" ); | ||
|  |             } | ||
|  |         } | ||
|  |         elsif ( defined( $msg->{$key}->{'references'} ) ) { | ||
|  |             # | ||
|  |             # There's no In-Reply-To: but we have References:, where the last one | ||
|  |             # is the equivalent. We update the parent message | ||
|  |             # | ||
|  |             $ref = $msg->{$key}->{'references'}->[-1]; | ||
|  |             if ( defined($ref) && defined( $id->{$ref} ) ) { | ||
|  |                 # | ||
|  |                 # The parent of this message is the key of the back reference and | ||
|  |                 # we add this key to the children of the parent of this message | ||
|  |                 # | ||
|  |                 $msg->{$key}->{parent} = $id->{$ref}; | ||
|  |                 push( | ||
|  |                     @{ $msg->{ $msg->{$key}->{parent} }->{children} }, | ||
|  |                     $id->{ $msg->{$key}->{'message-id'} } | ||
|  |                 ); | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: repairThreads | ||
|  | #      PURPOSE: Repair broken threads where possible | ||
|  | #   PARAMETERS: $mhash          Hashref of all messages | ||
|  | #               $silent         Boolean determining whether to emit error | ||
|  | #                               messages | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: We look for messages with a subject beginning with 'Re:' which | ||
|  | #               don't have any back links. These look like broken threads | ||
|  | #               where the sender has not used their mail client's ability to | ||
|  | #               generate an 'In-Reply-To:' and/or 'References' header.  We try | ||
|  | #               to find the thread root by looking for the subject in all of | ||
|  | #               the messages and then make the linkages. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub repairThreads { | ||
|  |     my ( $mhash, $silent ) = @_; | ||
|  | 
 | ||
|  |     foreach my $key ( sort( keys( %{$mhash} ) ) ) { | ||
|  |         if ( $mhash->{$key}->{subject} =~ /^Re:/ ) { | ||
|  |             if (!(     defined( $mhash->{$key}->{'in-reply-to'} ) | ||
|  |                     || defined( $mhash->{$key}->{'references'} ) | ||
|  |                 ) | ||
|  |                 ) | ||
|  |             { | ||
|  |                 # | ||
|  |                 # We have a message with a subject beginning with 'Re:' which | ||
|  |                 # doesn't have any back links. Looks like a broken thread. | ||
|  |                 # Find the thread root by looking for the subject. | ||
|  |                 # | ||
|  |                 my $parent = findParent( $mhash, $key ); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # The parent has to be defined to be meaningful | ||
|  |                 # | ||
|  |                 if ( defined($parent) ) { | ||
|  |                     # | ||
|  |                     # Make this message's parent the message we found in two | ||
|  |                     # ways, via the 'parent' link and the 'in-reply-to' | ||
|  |                     # header. | ||
|  |                     # | ||
|  |                     $mhash->{$key}->{parent} = $parent; | ||
|  |                     $mhash->{$key}->{'in-reply-to'} | ||
|  |                         = $mhash->{$parent}->{'message-id'}; | ||
|  | 
 | ||
|  |                     # | ||
|  |                     # Add this message to the parents children array, making | ||
|  |                     # sure it's sorted. | ||
|  |                     # | ||
|  |                     push( @{ $mhash->{$parent}->{children} }, $key ); | ||
|  |                     $mhash->{$parent}->{children} | ||
|  |                         = [ sort( @{ $mhash->{$parent}->{children} } ) ]; | ||
|  | 
 | ||
|  |                     #print "Parent:\n"; | ||
|  |                     #dumpMessage( $mhash->{$parent} ); | ||
|  |                     #print "Child:\n"; | ||
|  |                     #dumpMessage( $mhash->{$key} ); | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     emit( $silent, "Couldn't find parent of message $key\n" ); | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: findParent | ||
|  | #      PURPOSE: Given a particular element in the message hash find its parent | ||
|  | #               by processing the subject | ||
|  | #   PARAMETERS: $mhash          Hashref of all messages | ||
|  | #               $key            Key into the message hash | ||
|  | #      RETURNS: The key of the parent | ||
|  | #  DESCRIPTION: Tries to join an "orphaned" message with its parent by looking | ||
|  | #               for another instance of the subject. First the subject of this | ||
|  | #               message must begin with "Re:". This string is removed in order | ||
|  | #               to perform a search for the message this one is in reference | ||
|  | #               to. The search is conducted through all of the stored messages | ||
|  | #               and the matching message must not have a parent or any back | ||
|  | #               references itself to be acceptable. This is a fairly vague set | ||
|  | #               of criteria but it does work for a number of unlinked | ||
|  | #               messages. [Note: The regex used in the search has been modified | ||
|  | #               to be case insensitive having encountered an instance where | ||
|  | #               someone reconstructed the subject line in different case.] | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub findParent { | ||
|  |     my ( $mhash, $key ) = @_; | ||
|  | 
 | ||
|  |     my $start_subj = $mhash->{$key}->{subject}; | ||
|  |     my $end_subj; | ||
|  |     my $parent; | ||
|  | 
 | ||
|  |     if ( $start_subj =~ /^Re:/ ) { | ||
|  |         ( $end_subj = $start_subj ) =~ s/^Re:\s*//; | ||
|  |         foreach my $k ( keys( %{$mhash} ) ) { | ||
|  |             #if ( $msg{$k}->{subject} eq $end_subj ) { | ||
|  |             if (   $k ne $key | ||
|  |                 && $msg{$k}->{subject} =~ /(?:^Re:\s*)?(?i)$end_subj/ ) | ||
|  |             { | ||
|  |                 if (!(     defined( $msg{$k}->{'in-reply-to'} ) | ||
|  |                         || defined( $msg{$k}->{'references'} ) | ||
|  |                     ) | ||
|  |                     ) | ||
|  |                 { | ||
|  |                     $parent = $k; | ||
|  |                     last; | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  |         return $parent; | ||
|  |     } | ||
|  |     else { | ||
|  |         # | ||
|  |         # Doesn't look like a message with a parent | ||
|  |         # | ||
|  |         return; # implicit undef | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: buildThreadSummary | ||
|  | #      PURPOSE: Build a summary of the thread roots | ||
|  | #   PARAMETERS: $dt_from        The from date as a DateTime object | ||
|  | #               $dt_to          The to date as a DateTime object | ||
|  | #               $config         Hashref of the configuration data | ||
|  | #               $mhash          Hashref of all messages | ||
|  | #               $threads        Hashref to contain the resulting thread data | ||
|  | #               $silent         Boolean determining whether to emit error | ||
|  | #                               messages | ||
|  | #      RETURNS: Total number of messages in all eligible threads | ||
|  | #  DESCRIPTION: We have built a structure containing all of the message | ||
|  | #               threads we've seen. However, we're only interested in the | ||
|  | #               threads for a certain period, so we need to work them out | ||
|  | #               here. A thread root is a message which has no parents, and | ||
|  | #               we're interested in those which were written in our time | ||
|  | #               period. We collect the details we want for the final display | ||
|  | #               and build a weblink to the thread on Gmane. We also count the | ||
|  | #               number of messages per thread and the grand total. The | ||
|  | #               reulting structure is what we'll pass to the template we use | ||
|  | #               to generate the insertion for the Community News notes (and | ||
|  | #               other stuff). | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub buildThreadSummary { | ||
|  |     my ( $dt_from, $dt_to, $config, $mhash, $threads, $silent ) = @_; | ||
|  | 
 | ||
|  |     my ( $count, $total ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Use the UTC seconds for comparison | ||
|  |     # | ||
|  |     my $from = $dt_from->utc_rd_as_seconds(); | ||
|  |     my $to   = $dt_to->utc_rd_as_seconds(); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Adjust the regex from the config file for parsing the stored links. | ||
|  |     # Remove the '<>' enclosing it. | ||
|  |     # | ||
|  |     my $re = $config->{cache}->{regex}; | ||
|  |     $re =~ s/^<|>$//g; | ||
|  |     $re = qr{$re}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find each thread root which has messages in the date range | ||
|  |     # | ||
|  |     $total = 0; | ||
|  |     foreach my $key ( sort( keys( %{$mhash} ) ) ) { | ||
|  |         if ( !defined( $mhash->{$key}->{parent} ) | ||
|  |             && eligibleThread( $from, $to, $mhash, $key ) ) | ||
|  | #            && ( $key ge $from && $key lt $to ) ) | ||
|  |         { | ||
|  |             # | ||
|  |             # Stash the headers we want | ||
|  |             # | ||
|  |             foreach my $k (qw{ from to cc date subject archived-at }) { | ||
|  |                 $threads{$key}->{$k} = $mhash->{$key}->{$k}; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Make a weblink to the Gmane thread | ||
|  |             # TODO What if the regex doesn't match? | ||
|  |             # | ||
|  |             if ( $mhash->{$key}->{'archived-at'} =~ /$re/ ) { | ||
|  |                 $threads{$key}->{thread} | ||
|  |                     = sprintf( $config->{gmane}->{template2}, $1 ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit( $silent, | ||
|  |                     "Unable to match the Gmane thread (in message $key)\n" ); | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Count the messages in the thread and keep a grand total | ||
|  |             # | ||
|  |             $count = 0; | ||
|  |             threadLength( $from, $to, $mhash, $key, \$count ); | ||
|  |             $threads{$key}->{count} = $count; | ||
|  |             $total += $count; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return $total; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: eligibleThread | ||
|  | #      PURPOSE: Determines whether a given thread is eligible to be included | ||
|  | #               in a date range | ||
|  | #   PARAMETERS: $from           The from date as UTC seconds | ||
|  | #               $to             The to date as UTC seconds | ||
|  | #               $mhash          Hashref of all messages | ||
|  | #               $key            Key into the message hash for this particular | ||
|  | #                               thread | ||
|  | #      RETURNS: True (1) if the thread is eligible, otherwise false (0) | ||
|  | #  DESCRIPTION: Checks the top level message (thread root) for elegibility | ||
|  | #               (date in range) then recursively descends the thread checking | ||
|  | #               each message in the same manner as it goes. The results are | ||
|  | #               ORed together so that if any message is in range the entire | ||
|  | #               thread is. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub eligibleThread { | ||
|  |     my ( $from, $to, $mhash, $key ) = @_; | ||
|  | 
 | ||
|  |     my $res = ( $key ge $from && $key lt $to ); | ||
|  | 
 | ||
|  |     foreach my $k ( @{ $mhash->{$key}->{children} } ) { | ||
|  |         $res ||= ( $k ge $from && $k lt $to ) | ||
|  |             || eligibleThread( $from, $to, $mhash, $k ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return $res; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: threadLength | ||
|  | #      PURPOSE: Count the number of eligible messages in a thread | ||
|  | #   PARAMETERS: $from           The from date as UTC seconds | ||
|  | #               $to             The to date as UTC seconds | ||
|  | #               $mhash          Hashref of all messages | ||
|  | #               $key            Key into the message hash | ||
|  | #               $len            Scalar ref containing accumulated length | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Recursively descends through a thread (a tree of linked | ||
|  | #               messages) counting the elements but only if they are in range. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub threadLength { | ||
|  |     my ( $from, $to, $mhash, $key, $len ) = @_; | ||
|  | 
 | ||
|  |     $$len++ if ( $key ge $from && $key lt $to ); | ||
|  | 
 | ||
|  |     foreach my $k ( @{ $mhash->{$key}->{children} } ) { | ||
|  |         threadLength( $from, $to, $mhash, $k, $len ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: printThread | ||
|  | #      PURPOSE: To walk a message thread and print it | ||
|  | #   PARAMETERS: $mhash          Hashref of all messages | ||
|  | #               $key            Key into the message hash | ||
|  | #               $level          Integer denoting recursion level | ||
|  | #               $html           1 for HTML output, 0 for text | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Recursively descend through a thread (a tree of linked | ||
|  | #               messages) printing each one. If $html is true the message is | ||
|  | #               printed in HTML, otherwise it's plain text with indentation. | ||
|  | #               The level of indentation is controlled by $level which | ||
|  | #               increments as the tree is recursed. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub printThread { | ||
|  |     my ( $mhash, $key, $level, $html ) = @_; | ||
|  | 
 | ||
|  |     $level = 0 unless $level; | ||
|  |     $html  = 0 unless $html; | ||
|  | 
 | ||
|  |     printMessage( $mhash->{$key}, $level, $html ); | ||
|  | 
 | ||
|  |     foreach my $k ( @{ $mhash->{$key}->{children} } ) { | ||
|  |         printThread( $mhash, $k, $level + 1, $html ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: printMessage | ||
|  | #      PURPOSE: To print a message for debug purposes | ||
|  | #   PARAMETERS: $msg            Hashref containing message attributes | ||
|  | #               $level          Integer for indenting | ||
|  | #               $html           1 for HTML output, 0 for text | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Print a message. The details held consist only of a set of | ||
|  | #               headers, and these are printed as a label and a value. If | ||
|  | #               $html is true the output is HTML (an unnumbered list) and only | ||
|  | #               headers with values are reported. If $html is false then plain | ||
|  | #               text is used, empty values are reported and indentation is | ||
|  | #               used, controlled by the value of $level. In plain text mode | ||
|  | #               each item ends with a line of hyphens. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub printMessage { | ||
|  |     my ( $msg, $level, $html ) = @_; | ||
|  | 
 | ||
|  |     $level = 0 unless $level; | ||
|  |     $html  = 0 unless $html; | ||
|  | 
 | ||
|  |     # TODO Don't do it this way, use the global definition since it's used | ||
|  |     # elsewhere | ||
|  |     state %tags; | ||
|  |     %tags = ( | ||
|  |         'from'        => 'From', | ||
|  |         'to'          => 'To', | ||
|  |         'cc'          => 'CC', | ||
|  |         'date'        => 'Date', | ||
|  |         'subject'     => 'Subject', | ||
|  |         'archived-at' => 'Link', | ||
|  |     ); | ||
|  | 
 | ||
|  |     print "<ul>" if $html; | ||
|  | 
 | ||
|  |     foreach my $k (qw{ from to cc date subject archived-at }) { | ||
|  |         if ($html) { | ||
|  |             if ( !empty( $msg->{$k} ) ) { | ||
|  |                 printf "<li>%-8s %s</li>\n", "$tags{$k}:", | ||
|  |                     coalesce( $msg->{$k}, '' ); | ||
|  |             } | ||
|  |         } | ||
|  |         else { | ||
|  |             printf "%s%-8s %s\n", '  ' x $level, "$tags{$k}:", | ||
|  |                 coalesce( $msg->{$k}, '' ); | ||
|  |         } | ||
|  |     } | ||
|  |     if ($html) { | ||
|  |         print "</ul>\n"; | ||
|  |     } | ||
|  |     else { | ||
|  |         print '-' x 80, "\n"; | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: dumpMessage | ||
|  | #      PURPOSE: To print all parts of a message hash for debugging | ||
|  | #   PARAMETERS: $msg            Hashref containing message attributes | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Produces a dump of a message for debugging purposes. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub dumpMessage { | ||
|  |     my ($msg) = @_; | ||
|  | 
 | ||
|  |     foreach my $k ( sort( keys( %{$msg} ) ) ) { | ||
|  |         printf "%12s: %s\n", $k, coalesce( $msg->{$k}, '' ); | ||
|  |     } | ||
|  |     print '-' x 80, "\n"; | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: emit | ||
|  | #      PURPOSE: Print text on STDERR unless silent mode has been selected | ||
|  | #   PARAMETERS: - Boolean indicating whether to be silent or not | ||
|  | #               - list of arguments to 'print' | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: This is a wrapper around 'print' to determine whether to send | ||
|  | #               a message to STDERR depending on a boolean. We need this to be | ||
|  | #               able to make the script silent when the -silent option is | ||
|  | #               selected | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub emit { | ||
|  |     unless (shift) { | ||
|  |         print STDERR @_; | ||
|  |     } | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Options | ||
|  | #      PURPOSE: Processes command-line options | ||
|  | #   PARAMETERS: $optref     Hash reference to hold the options | ||
|  | #      RETURNS: Undef | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: no exceptions | ||
|  | #     COMMENTS: none | ||
|  | #     SEE ALSO: n/a | ||
|  | #=============================================================================== | ||
|  | sub Options { | ||
|  |     my ($optref) = @_; | ||
|  | 
 | ||
|  |     my @options = ( | ||
|  |         "help",    "debug=i",   "out=s",        "template=s", | ||
|  |         "silent!", "checknew!", "initialise=i", "from=s", | ||
|  |         "to=s", | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ( !GetOptions( $optref, @options ) ) { | ||
|  |         pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | __END__ | ||
|  | 
 | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #  Application Documentation | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #{{{ | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | summarise_mail - Generate a summary of messages on the HPR mailing list | ||
|  | 
 | ||
|  | =head1 VERSION | ||
|  | 
 | ||
|  | This documentation refers to summarise_mail version 0.0.5 | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 USAGE | ||
|  | 
 | ||
|  |     ./summarise_mail [-help] [-debug=N] [-from=DATE] [-to=DATE] [-out=FILE] | ||
|  |         [-template=FILE] [-[no]silent] [-[no]checknew] [-initialise=N] | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | =over 8 | ||
|  | 
 | ||
|  | =item B<-help> | ||
|  | 
 | ||
|  | Prints a brief help message describing the usage of the program, and then exits. | ||
|  | 
 | ||
|  | =item B<-initialise=N> | ||
|  | 
 | ||
|  | This option builds a new mailbox cache. The value B<N> defines the starting | ||
|  | message to be downloaded from the Gmane site. The number defaults to 1, though | ||
|  | this is not advisable since it potentially overloads the Gmane server. The | ||
|  | number of messages downloaded is limited by the B<lookahead> value in the | ||
|  | configuration file (see below). | ||
|  | 
 | ||
|  | If a cache file already exists the script will not run. It is necessary to | ||
|  | delete this file before initialising a new one. | ||
|  | 
 | ||
|  | After the cache is initialised the script will process the messages it | ||
|  | contains. | ||
|  | 
 | ||
|  | The B<-initialise=N> and the B<-[no]checknew> options are mutually exclusive. | ||
|  | 
 | ||
|  | =item B<-[no]checknew> | ||
|  | 
 | ||
|  | This option defines whether the script will check Gmane for updates to the | ||
|  | mailing list. If updates are found then they will be incorporated into the | ||
|  | cached mailbox file. | ||
|  | 
 | ||
|  | If omitted no check is made (B<-nochecknew>). | ||
|  | 
 | ||
|  | The B<-initialise=N> and the B<-[no]checknew> options are mutually exclusive. | ||
|  | 
 | ||
|  | =item B<-from=DATE> | ||
|  | 
 | ||
|  | Specifies the starting date for the mail summary. | ||
|  | 
 | ||
|  | The date format should be B<DD-Mon-YYYY> (e.g. 12-Jun-2014), B<DD-MM-YYYY> | ||
|  | (e.g. 12-06-2014) or B<YYYY-MM-DD> (e.g. 2014-06-12). | ||
|  | 
 | ||
|  | If this option is omitted the current date is used. | ||
|  | 
 | ||
|  | =item B<-to=DATE> | ||
|  | 
 | ||
|  | Specifies the ending date for the mail summary. | ||
|  | 
 | ||
|  | The date format should be B<DD-Mon-YYYY> (e.g. 12-Jun-2014), B<DD-MM-YYYY> | ||
|  | (e.g. 12-06-2014) or B<YYYY-MM-DD> (e.g. 2014-06-12). | ||
|  | 
 | ||
|  | If this option is omitted the last day of the month defined by the B<-from> | ||
|  | date is used. | ||
|  | 
 | ||
|  | =item B<-template=FILE> | ||
|  | 
 | ||
|  | This option defines the template that will be used to format the report | ||
|  | generated by the script. | ||
|  | 
 | ||
|  | If the option is not provided, the script will use a template called | ||
|  | B<mailnote_template.tpl> in the directory which holds the script. This | ||
|  | template generates HTML suitable for incorporation into the Community News | ||
|  | show notes. | ||
|  | 
 | ||
|  | =item B<-out=FILE> | ||
|  | 
 | ||
|  | This option defines the output file for the report.  If the option is omitted | ||
|  | the report is written to STDOUT, allowing it to be redirected if required. | ||
|  | 
 | ||
|  | The output file name may contain the characters 'B<%s>'. This denotes the point | ||
|  | at which the year and month in the format B<YYYY-MM> are inserted. For example | ||
|  | if the script is being run for July 2014 the option: | ||
|  | 
 | ||
|  |     -out=mailreport_%s.html | ||
|  | 
 | ||
|  | will cause the generation of the file: | ||
|  | 
 | ||
|  |     mailreport_2014-07.html | ||
|  | 
 | ||
|  | =item B<-[no]silent> | ||
|  | 
 | ||
|  | This option controls whether the script reports minimal details of what it is | ||
|  | doing to STDERR. If the option is omitted the report is generated (B<-nosilent>). | ||
|  | 
 | ||
|  | The script reports: the starting and ending dates it is working on and the name of the output file | ||
|  | (if appropriate). | ||
|  | 
 | ||
|  | =item B<-debug=N> | ||
|  | 
 | ||
|  | Runs the script in debug mode if the value of B<N> is greater than 0. The | ||
|  | default setting is zero. | ||
|  | 
 | ||
|  | In debug mode the script reports various pieces of information about the | ||
|  | parsing of the mail messages and building of threads. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | =head2 Overview | ||
|  | 
 | ||
|  | This script generates a summary of the mail sent to the HPR mailing list over | ||
|  | a period. | ||
|  | 
 | ||
|  | Messages to the mailing list are copied to the free B<Gmane> service and the | ||
|  | script uses this to prepare its report. The Gmane service offers | ||
|  | a downloadable interface at | ||
|  | http://download.gmane.org/gmane.network.syndication.podcast.hacker-public-radio | ||
|  | where selected messages can be collected in B<MBOX> format. | ||
|  | 
 | ||
|  | =head2 Methodology | ||
|  | 
 | ||
|  | The script operates in three phases: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | Messages are collected from Gmane and stored in a local cache (a file in MBOX | ||
|  | format). This is expected to be done once, and thereafter new messages are | ||
|  | appended to this file. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | New messages are appended to the cache file. This is done by determining the | ||
|  | last message in the cache then requesting the next block of messages from | ||
|  | Gmane. The downloaded messages are then added to the file. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | The cache is processed to generate the report. Usually the script is given | ||
|  | a start date (and optional end date, though this defaults to the end of the | ||
|  | month in the start date). | ||
|  | 
 | ||
|  | The algorithm is as follows: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | The script parses out the important headers of all messages in the cache. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | The script walks the header structure in time order joining messages based on | ||
|  | their 'In-reference-to:' and 'References:' headers. Each message gets its | ||
|  | parent recorded as a pointer, and its children as an array of pointers. If | ||
|  | a reference is to a message that is not in the cache then it is not recorded. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | At this point all messages without a parent are either the thread roots or | ||
|  | cases where someone has not replied properly. The script performs a further | ||
|  | join when an "orphaned" message has the same subject as another with a 'Re:' | ||
|  | on the front. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | Now all messages without parents are the best that can be done to detect | ||
|  | thread roots. These are used to make the report. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | The messages that cannot be threaded are those where the sender does not use | ||
|  | 'Reply' or where they reply to a digest. Digests are not threaded because they | ||
|  | are blobs of multiple messages. | ||
|  | 
 | ||
|  | =item - | ||
|  | 
 | ||
|  | Some messages may be threaded erroneously. This happens when a sender, rather | ||
|  | than generating a new message, replies to an existing message but changes the | ||
|  | 'Subject:' line. There is not much that can be done to correct this. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head2 Report Template | ||
|  | 
 | ||
|  | The script uses the B<Template> module to format the report. See the default | ||
|  | template B<mailnote_template.tpl> for an example. | ||
|  | 
 | ||
|  | The template is passed two variables: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<total> | ||
|  | 
 | ||
|  | This is a scalar variable containing a number: the count of messages in the | ||
|  | specified range. | ||
|  | 
 | ||
|  | =item B<threads> | ||
|  | 
 | ||
|  | This is a hash indexed by the message date converted to UTC date then to the | ||
|  | number of seconds since the Unix epoch. Each hash value is a hashref | ||
|  | containing the following fields: | ||
|  | 
 | ||
|  |  Field          Contents | ||
|  |  -----          -------- | ||
|  |  from           The contents of the 'From:' header | ||
|  |  to             The contents of the 'To:' header | ||
|  |  cc             The contents of the 'Cc:' header | ||
|  |  date           The contents of the 'Date:' header | ||
|  |  subject        The contents of the 'Subject:' header | ||
|  |  archived-at    The contents of the 'Archived-at:' header (the Gmane | ||
|  |                 permalink URL) | ||
|  |  count          The number of messages in the thread | ||
|  | 
 | ||
|  | These messages are the roots of all of the detectable threads in the period. | ||
|  | Sorting them by the main hash key results in the threads being ordered by | ||
|  | their timestamp. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DIAGNOSTICS | ||
|  | 
 | ||
|  | A list of every error and warning message that the application can generate | ||
|  | (even the ones that will "never happen"), with a full explanation of each | ||
|  | problem, one or more likely causes, and any suggested remedies. If the | ||
|  | application generates exit status codes (e.g. under Unix) then list the exit | ||
|  | status associated with each error. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 CONFIGURATION AND ENVIRONMENT | ||
|  | 
 | ||
|  | The script obtains the information it requires to find the mail details from | ||
|  | B<Gmane> from a configuration file. The name of the file it expects is | ||
|  | B<.summarise_mail.cfg> in the | ||
|  | directory holding the script. To change this will require changing the script. | ||
|  | 
 | ||
|  | The configuration file format is as follows: | ||
|  | 
 | ||
|  |  <gmane> | ||
|  |      url = http://download.gmane.org/gmane.network.syndication.podcast.hacker-public-radio | ||
|  |      template = "$url/%d/%d" | ||
|  |      lookahead = 100 | ||
|  |  </gmane> | ||
|  | 
 | ||
|  |  <cache> | ||
|  |      directory = /home/dave/Community_News/mail_cache | ||
|  |      filename = gmane.mbox | ||
|  |      regex = "<http://permalink\.gmane\.org/gmane\.network\.syndication\.podcast\.hacker-public-radio/(\d+)>" | ||
|  |  </cache> | ||
|  | 
 | ||
|  | The elements of this configuration file are explained below. First the | ||
|  | B<gmane> section: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<url> | ||
|  | 
 | ||
|  | This is the base URL on the Gmane site. It is used to construct URLs for | ||
|  | collecting mail messages. This should not require any changes. | ||
|  | 
 | ||
|  | =item B<template> | ||
|  | 
 | ||
|  | This is a template which is used to generate the actual URL used to download | ||
|  | messages. The script replaces each '%d' with a number. The first is the | ||
|  | starting message number, and the second the ending number plus one. | ||
|  | 
 | ||
|  | This should not require any changes. | ||
|  | 
 | ||
|  | =item B<lookahead> | ||
|  | 
 | ||
|  | This defines the maximum number of messages to be collected. This number is | ||
|  | added to the first number used when contructing the URL from the B<template> | ||
|  | above. Normally just a few messages are downloaded at a time, but when the | ||
|  | script is used to initialise the mailbox cache file this number of messages may | ||
|  | be downloaded. | ||
|  | 
 | ||
|  | This value can be changed if desired, but it is set at 100 in order to avoid | ||
|  | overloading the Gmane servers. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | The B<cache> section contains the following: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<directory> | ||
|  | 
 | ||
|  | This defines the full path to the directory holding the mail cache file. | ||
|  | 
 | ||
|  | This may be changed to reflect a change of location. | ||
|  | 
 | ||
|  | =item B<filename> | ||
|  | 
 | ||
|  | This defines the name of the mail cache. It is expected to be found in the | ||
|  | directory defined above. | ||
|  | 
 | ||
|  | This may be changed to reflect a change of name. | ||
|  | 
 | ||
|  | =item B<regex> | ||
|  | 
 | ||
|  | Individual messages in the cache each hold an URL which provides a permanent | ||
|  | link to the message on the Gmane site. The script uses the regular expression | ||
|  | defined here to collect the message number from this URL. This is how the | ||
|  | script determines the number of the oldest message in the cache when looking | ||
|  | for new messages. | ||
|  | 
 | ||
|  | This expression should only be changed with great caution. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DEPENDENCIES | ||
|  | 
 | ||
|  |  Getopt::Long | ||
|  |  Pod::Usage | ||
|  |  Config::General | ||
|  |  LWP::UserAgent | ||
|  |  Mail::Box::Manager | ||
|  |  Date::Parse | ||
|  |  Date::Calc | ||
|  |  DateTime | ||
|  |  DateTime::TimeZone | ||
|  |  Template | ||
|  |  Template::Filters | ||
|  |  Data::Dumper | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 BUGS AND LIMITATIONS | ||
|  | 
 | ||
|  | There are no known bugs in this module. | ||
|  | Please report problems to Dave Morriss (Dave.Morriss@gmail.com) | ||
|  | Patches are welcome. | ||
|  | 
 | ||
|  | =head1 AUTHOR | ||
|  | 
 | ||
|  | Dave Morriss (Dave.Morriss@gmail.com) | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 LICENCE AND COPYRIGHT | ||
|  | 
 | ||
|  | Copyright (c) 2014, 2015 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. | ||
|  | 
 | ||
|  | This module is free software; you can redistribute it and/or | ||
|  | modify it under the same terms as Perl itself. See perldoc perlartistic. | ||
|  | 
 | ||
|  | This program is distributed in the hope that it will be useful, | ||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | #}}} | ||
|  | 
 | ||
|  | # [zo to open fold, zc to close] | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker |