forked from HPR/hpr-tools
		
	
		
			
	
	
		
			2196 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			2196 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|  | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: feedWatcher | ||
|  | # | ||
|  | #        USAGE: ./feedWatcher [-help] [-load=FILE] [-delete=FILE] [-[no]scan] | ||
|  | #               [-[no]report] [-[no]check] [-out=FILE] [-json[=FILE]] | ||
|  | #               [-opml[=FILE]] [-template[=FILE]] [-[no]silent] [-debug=N] | ||
|  | #               [URL ...] | ||
|  | # | ||
|  | #  DESCRIPTION: A rewrite of Ken Fallon's script to collect data about Linux | ||
|  | #               podcasts. Ken's script was inspired by Dann Washko's site at | ||
|  | #               http://www.thelinuxlink.net/ and prepared for OggCamp 12 in | ||
|  | #               August 2012 where it was used to generate handouts. | ||
|  | # | ||
|  | #               The script has not been developed since 2014, but is now in | ||
|  | #               use in 2020 helping to prepare podcast information for | ||
|  | #               a FOSDEM visit under the heading of "Free Culture Podcasts". | ||
|  | #               See the files 'Journal.adoc' (AsciiDoctor) and 'Journal.html' | ||
|  | #               (created with Pandoc) in the same directory as this script for | ||
|  | #               details of what has been done to develop the original design. | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.14 | ||
|  | #      CREATED: 2013-12-25 12:40:33 | ||
|  | #     REVISION: 2020-02-18 22:45:28 | ||
|  | # | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Released under the terms of the GNU Affero General Public License (AGPLv3) | ||
|  | # http://www.gnu.org/licenses/agpl-3.0.html | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use 5.030; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use utf8; | ||
|  | 
 | ||
|  | use Getopt::Long; | ||
|  | use Pod::Usage; | ||
|  | use Config::General; | ||
|  | use List::MoreUtils qw{uniq}; | ||
|  | use Log::Handler; | ||
|  | 
 | ||
|  | use Try::Tiny; | ||
|  | 
 | ||
|  | use URI; | ||
|  | use Net::DNS; | ||
|  | use IO::Socket; | ||
|  | use LWP::UserAgent; | ||
|  | use WWW::RobotRules; | ||
|  | use XML::RSS::Parser; | ||
|  | use XML::Feed; | ||
|  | use Feed::Find; | ||
|  | 
 | ||
|  | use Template; | ||
|  | use Template::Filters; | ||
|  | Template::Filters->use_html_entities;    # Use HTML::Entities in the template | ||
|  | 
 | ||
|  | use HTML::Entities; | ||
|  | 
 | ||
|  | use IO::Prompter; | ||
|  | 
 | ||
|  | use JSON; | ||
|  | 
 | ||
|  | use DBI; | ||
|  | use DateTime::Format::SQLite; | ||
|  | use DateTime::Format::ISO8601; | ||
|  | use SQL::Abstract; | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.0.14'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script name | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | 
 | ||
|  | # | ||
|  | # Declarations | ||
|  | # | ||
|  | my ( @new_urls, @deletions ); | ||
|  | my ( $rules,    $robot_name ) = ( undef, "$PROG/$VERSION" ); | ||
|  | my ( $sth1, $h1, $rv ); | ||
|  | 
 | ||
|  | my $feeds; | ||
|  | 
 | ||
|  | # | ||
|  | # File and directory paths | ||
|  | # | ||
|  | my $basedir     = "$ENV{HOME}/HPR/feed_watcher"; | ||
|  | my $tempdir     = "$basedir/tmp"; | ||
|  | my $configfile  = "$basedir/$PROG.cfg"; | ||
|  | my $logfile     = "$basedir/${PROG}.log"; | ||
|  | my $deftemplate = "$basedir/${PROG}.tpl"; | ||
|  | 
 | ||
|  | # | ||
|  | # Hash key map to database field names (chevron comments are to stop Perltidy | ||
|  | # messing with the layout) | ||
|  | # | ||
|  |                                                 #<<< | ||
|  | my %keymap = ( | ||
|  |     AUTHOR            => 'author', | ||
|  |     COPYRIGHT         => 'copyright', | ||
|  |     DESCRIPTION       => 'description', | ||
|  |     DNS               => 'dns', | ||
|  | #   ENCLOSURES        => undef, | ||
|  | #   ENCLOSURE_COUNT   => undef, | ||
|  |     FORMAT            => 'feedformat', | ||
|  |     GENERATOR         => 'generator', | ||
|  | #   HOST              => undef, | ||
|  |     HOSTUP            => 'host_up', | ||
|  |     HTTP_CONTENT_TYPE => 'content_type', | ||
|  |     HTTP_STATUS       => 'http_status', | ||
|  |     IMAGE             => 'image', | ||
|  |     LANGUAGE          => 'language', | ||
|  |     LINK              => 'link', | ||
|  |     MODIFIED          => 'modified', | ||
|  | #   PORT              => undef, | ||
|  | #   ROBOTS            => undef, | ||
|  | #   SAVE              => undef, | ||
|  |     TITLE             => 'title', | ||
|  |     TYPE              => 'urltype', | ||
|  |     URI               => 'url', | ||
|  | #   URI_ID            => undef, | ||
|  | ); | ||
|  |                                                 #>>> | ||
|  | 
 | ||
|  | # | ||
|  | # Headers for LWP::UserAgent | ||
|  | # | ||
|  | my %headers = ( | ||
|  |     'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux i686; ' | ||
|  |         . 'rv:15.0) Gecko/20100101 Firefox/15.0.1', | ||
|  |     'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, ' | ||
|  |         . 'image/pjpeg, image/png, */*', | ||
|  |     'Accept-Charset'  => 'iso-8859-1,*,utf-8', | ||
|  |     'Accept-Language' => 'en-US', | ||
|  | ); | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | # | ||
|  | # Options and arguments | ||
|  | # | ||
|  | my %options; | ||
|  | Options( \%options ); | ||
|  | 
 | ||
|  | # | ||
|  | # Any arguments are taken to be URLs | ||
|  | # | ||
|  | @new_urls = @ARGV; | ||
|  | 
 | ||
|  | # | ||
|  | # Default help | ||
|  | # | ||
|  | pod2usage( -msg => "Version $VERSION\n", -exitval => 1 ) | ||
|  |     if ( $options{'help'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Detailed help | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 2, -exitval => 1, -noperldoc => 0 ) | ||
|  |     if ( $options{'manpage'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Collect options | ||
|  | # | ||
|  | my $DEBUG = ( $options{'debug'} ? $options{'debug'} : 0 ); | ||
|  | 
 | ||
|  | my $cfgfile | ||
|  |     = ( defined( $options{config} ) ? $options{config} : $configfile ); | ||
|  | my $silent     = ( defined( $options{silent} )    ? $options{silent}    : 0 ); | ||
|  | my $loadfile   = $options{'load'}; | ||
|  | my $deletefile = $options{'delete'}; | ||
|  | 
 | ||
|  | my $scan   = ( defined( $options{scan} )   ? $options{scan}   : 0 ); | ||
|  | my $report = ( defined( $options{report} ) ? $options{report} : 0 ); | ||
|  | my $check  = ( defined( $options{check} )  ? $options{check}  : 0 ); | ||
|  | 
 | ||
|  | my $outfile  = $options{out}; | ||
|  | my $json     = $options{json}; | ||
|  | my $opml     = $options{opml}; | ||
|  | my $template = $options{template}; | ||
|  | 
 | ||
|  | # | ||
|  | # Check the configuration file | ||
|  | # | ||
|  | die "Unable to find configuration file $cfgfile\n" unless ( -e $cfgfile ); | ||
|  | 
 | ||
|  | # | ||
|  | # Check the load file | ||
|  | # | ||
|  | if ($loadfile) { | ||
|  |     die "File $loadfile does not exist\n"  unless -e $loadfile; | ||
|  |     die "File $loadfile is not readable\n" unless -r $loadfile; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Check the delete file | ||
|  | # | ||
|  | if ($deletefile) { | ||
|  |     die "File $deletefile does not exist\n"  unless -e $deletefile; | ||
|  |     die "File $deletefile is not readable\n" unless -r $deletefile; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # We accept -json or -json=filename. In the former case we make a default | ||
|  | # name, otherwise we use the one provided. | ||
|  | # | ||
|  | if ( defined($json) ) { | ||
|  |     if ($json =~ /^$/) { | ||
|  |         $json = "$PROG.json" | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # We accept -opml or -opml=filename. In the former case we make a default | ||
|  | # name, otherwise we use the one provided. | ||
|  | # | ||
|  | if ( defined($opml) ) { | ||
|  |     if ($opml =~ /^$/) { | ||
|  |         $opml = "$PROG.opml" | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Similarly we accept -template or -template=filename. In the former case we | ||
|  | # make a default name, otherwise we use the one provided. | ||
|  | # | ||
|  | if ( defined($template) ) { | ||
|  |     if ($template =~ /^$/) { | ||
|  |         $template = "$deftemplate"; | ||
|  |     } | ||
|  | 
 | ||
|  |     die "Error: Unable to find template $template\n" unless -r $template; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Load configuration data | ||
|  | # | ||
|  | my $conf = new Config::General( | ||
|  |     -ConfigFile      => $cfgfile, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1 | ||
|  | ); | ||
|  | my %config = $conf->getall(); | ||
|  | 
 | ||
|  | # | ||
|  | # Connect to the database | ||
|  | # | ||
|  | my $dbtype = $config{database}->{type}; | ||
|  | my $dbfile = $config{database}->{file}; | ||
|  | my $dbuser = $config{database}->{user}; | ||
|  | my $dbpwd  = $config{database}->{password}; | ||
|  | my $dbh | ||
|  |     = DBI->connect( "dbi:$dbtype:dbname=$dbfile", $dbuser, $dbpwd, | ||
|  |     { AutoCommit => 1, sqlite_unicode => 1, } ) | ||
|  |     or die $DBI::errstr; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable SQLite's foreign keys (necessary to enable any ON DELETE CASCADE | ||
|  | # foreign key constraints to function) | ||
|  | # | ||
|  | $dbh->do('PRAGMA foreign_keys = ON'); | ||
|  | 
 | ||
|  | # | ||
|  | # Check we have something to do | ||
|  | # | ||
|  | my $rows = countRows( $dbh, 'SELECT count(*) FROM urls' ); | ||
|  | my $work = ( | ||
|  |            scalar(@new_urls) > 0 | ||
|  |         || defined($loadfile) | ||
|  |         || defined($deletefile) | ||
|  |         || ( $report | ||
|  |         || defined($json) | ||
|  |         || defined($opml) | ||
|  |         || defined($template) | ||
|  |         || $scan && $rows > 0 ) | ||
|  | ); | ||
|  | 
 | ||
|  | die "Nothing to do!\n" unless $work; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Set up logging keeping the default log layout except for the date | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $LOG = Log::Handler->new(); | ||
|  | 
 | ||
|  | $LOG->add( | ||
|  |     file => { | ||
|  |         timeformat => "%Y/%m/%d %H:%M:%S", | ||
|  |         filename   => $logfile, | ||
|  |         minlevel   => 0, | ||
|  |         maxlevel   => 7, | ||
|  |     } | ||
|  | ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Open the output file (or STDOUT) - we may need the date to do it | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $outfh; | ||
|  | if ($outfile) { | ||
|  |     open( $outfh, ">:encoding(UTF-8)", $outfile ) | ||
|  |         or warn "Unable to open $outfile for writing: $!"; | ||
|  | } | ||
|  | else { | ||
|  |     open( $outfh, ">&:encoding(UTF-8)", \*STDOUT ) | ||
|  |         or warn "Unable to initialise for writing: $!"; | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | # | ||
|  | # Set up a robot.txt rules parser | ||
|  | # | ||
|  | $rules = WWW::RobotRules->new($robot_name); | ||
|  | 
 | ||
|  | # | ||
|  | # Slurp the load file if there is one | ||
|  | # | ||
|  | if ($loadfile) { | ||
|  |     # | ||
|  |     # Load the input file | ||
|  |     # | ||
|  |     open( my $in, '<:encoding(utf8)', $loadfile ) | ||
|  |         or die "$PROG : failed to open load file '$loadfile' : $!\n"; | ||
|  |     chomp( my @loaded = <$in> ); | ||
|  |     close($in) | ||
|  |         or warn "$PROG : failed to close load file '$loadfile' : $!\n"; | ||
|  | 
 | ||
|  |     $LOG->info("Loaded URLs from $loadfile"); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Add the loaded URLs to the array | ||
|  |     # | ||
|  |     push( @new_urls, @loaded ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Now, we either have URLs from the command line, or from the load file, so we | ||
|  | # process these. | ||
|  | # | ||
|  | if (@new_urls) { | ||
|  |     # | ||
|  |     # Remove duplicates | ||
|  |     # | ||
|  |     @new_urls = uniq(@new_urls); | ||
|  | 
 | ||
|  |     $LOG->info( "Adding ", scalar(@new_urls), " URLs to the database" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Load these URLs as appropriate, returning any more that we find by | ||
|  |     # following HTML urls. | ||
|  |     # | ||
|  |     @new_urls = loadUrls( $dbh, \@new_urls, $rules, \%keymap ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Now process any URLs that came back. Since we are explicitly looking for | ||
|  |     # feeds we can assume that's what we have so don't need to recurse again. | ||
|  |     # | ||
|  |     # TODO | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Process the delete file if there is one | ||
|  | # | ||
|  | if ($deletefile) { | ||
|  |     # | ||
|  |     # Load the delete file | ||
|  |     # | ||
|  |     open( my $del, '<:encoding(utf8)', $deletefile ) | ||
|  |         or die "$PROG : failed to open load file '$deletefile' : $!\n"; | ||
|  |     chomp( @deletions = <$del> ); | ||
|  |     close($del) | ||
|  |         or warn "$PROG : failed to close load file '$deletefile' : $!\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Remove duplicates | ||
|  |     # | ||
|  |     @deletions = uniq(@deletions); | ||
|  | 
 | ||
|  |     $LOG->info( "Deleting ", scalar(@deletions), " URLs from the database" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # There are URLs to delete. Process them on by one. | ||
|  |     # | ||
|  |     if (@deletions) { | ||
|  |         $sth1 = $dbh->prepare(q{DELETE from urls WHERE url = ?}); | ||
|  |         foreach my $rec (@deletions) { | ||
|  |             $rv = $sth1->execute($rec); | ||
|  |             if ( $dbh->err ) { | ||
|  |                 warn $dbh->errstr; | ||
|  |             } | ||
|  |             if ( $rv != 0 ) { | ||
|  |                 emit ( $silent, "Deleted $rec ($rv rows)\n" ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit ( $silent, "Failed to delete $rec\n" ); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Perform a database scan if requested | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($scan) { | ||
|  |     $LOG->info( "Scan is not fully implemented yet" ); | ||
|  |     scanDB($dbh, \%keymap); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Report database contents if requested | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($report) { | ||
|  |     reportDB($dbh, $outfh); | ||
|  |     $LOG->info( "Report generated" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Do any image repairs we need, but temporarily | ||
|  | # | ||
|  | if ($json || $opml || $template) { | ||
|  |     $feeds = collectData($dbh); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Clean up the image references (until we have a proper fix) | ||
|  |     # | ||
|  |     for (my $i = 0; $i < scalar(@$feeds); $i++) { | ||
|  |         my $f = $feeds->[$i]; | ||
|  |         if (defined($f->{urls_image}) && $f->{urls_image} =~ /^ARRAY\([^)]+\)/) { | ||
|  |             $f->{urls_image} =~ s/^(ARRAY\([^)]+\))//; | ||
|  |             $LOG->info( "Fixed image for $f->{urls_url}" ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Output all feeds to a JSON file if requested | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($json) { | ||
|  |     my $js = JSON->new->utf8->canonical->pretty; | ||
|  |     open( my $out, '>:encoding(UTF-8)', $json ) | ||
|  |         or die "Unable to open output file $json $!\n"; | ||
|  | 
 | ||
|  |     print $out $js->encode($feeds), "\n"; | ||
|  |     close($out); | ||
|  |     emit ( $silent, "JSON data written to $json\n" ); | ||
|  |     $LOG->info( "JSON data written to $json" ); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Requesting the -opml option means to dump the entire feed table as OPML | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($opml) { | ||
|  |     require XML::OPML; | ||
|  | 
 | ||
|  |     open( my $out, '>:encoding(UTF-8)', $opml ) | ||
|  |         or die "Unable to open output file $opml $!\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Start building OPML. Mandatory attributes are 'type', 'text' and | ||
|  |     # 'xmlURL'. The 'title' attribute is the same as 'text'. The 'type' | ||
|  |     # attribute is poorly defined; this module uses 'rss' as the type and | ||
|  |     # 'RSS' as the value of the 'version' attribute. This is not linked to the | ||
|  |     # type of the feed. | ||
|  |     # | ||
|  |     my $OPML = XML::OPML->new(version => '1.1'); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Create the 'head' element | ||
|  |     # | ||
|  |     my $DT = normaliseDT(DateTime->now()); | ||
|  |     $OPML->head( | ||
|  |         title => 'Free Culture Podcasts', | ||
|  |         dateCreated => $DT, | ||
|  |         dateModified => $DT, | ||
|  |     ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Walk the feeds generating OPML | ||
|  |     # | ||
|  |     for (my $i = 0; $i < scalar(@$feeds); $i++) { | ||
|  |         my $f = $feeds->[$i]; | ||
|  | 
 | ||
|  |         $OPML->add_outline( | ||
|  |             title       => $f->{urls_title}, | ||
|  |             text        => $f->{urls_title}, | ||
|  |             description => $f->{urls_description}, | ||
|  |             xmlUrl      => $f->{urls_url}, | ||
|  |             htmlUrl     => $f->{urls_link}, | ||
|  |         ); | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Output OPML to the nominated file | ||
|  |     # | ||
|  |     print $out $OPML->as_string; | ||
|  | 
 | ||
|  |     close($out); | ||
|  |     emit ( $silent, "OPML data written to $opml\n" ); | ||
|  |     $LOG->info( "OPML data written to $opml" ); | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Fill and print the template if requested | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($template) { | ||
|  |     my $tt = Template->new( | ||
|  |         {   ABSOLUTE     => 1, | ||
|  |             ENCODING     => 'utf8', | ||
|  |             INCLUDE_PATH => $basedir, | ||
|  |         } | ||
|  |     ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Make the structure the template needs | ||
|  |     # | ||
|  |     my $vars = { | ||
|  |         feeds => $feeds, | ||
|  |     }; | ||
|  | 
 | ||
|  | #   print Dumper($vars),"\n"; | ||
|  | 
 | ||
|  |     my $document; | ||
|  |     $tt->process( $template, $vars, \$document, { binmode => ':utf8' } ) | ||
|  |         || die $tt->error(), "\n"; | ||
|  |     print $outfh $document; | ||
|  | 
 | ||
|  |     emit ( $silent, "Data processed with $template\n" ); | ||
|  |     $LOG->info( "Data processed with $template" ); | ||
|  | } | ||
|  | 
 | ||
|  | #$dbh->disconnect; | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: loadUrls | ||
|  | #      PURPOSE: To load URLs read from the input file into the database | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $new_urls       arrayref containing URLs | ||
|  | #               $rules          WWW::RobotRules object | ||
|  | #               $keymap         hashref containing a map of key names to | ||
|  | #                               database field names | ||
|  | #      RETURNS: Currently nothing. The plan is to return any new URLs | ||
|  | #               discovered by investigating non-feed URLs. | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub loadUrls { | ||
|  |     my ( $dbh, $new_urls, $rules, $keymap ) = @_; | ||
|  | 
 | ||
|  |     my ( $stream, $feed ); | ||
|  |     my %uridata; | ||
|  |     my $roboturl; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Work through the list of URLs | ||
|  |     # | ||
|  |     foreach my $rec (@new_urls) { | ||
|  |         %uridata = (); | ||
|  | 
 | ||
|  |         # | ||
|  |         # By default save the collected data | ||
|  |         # | ||
|  |         $uridata{SAVE} = 1; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check that we have a valid URL. We don't save them if they are | ||
|  |         # invalid ($uridata{SAVE} is set to 0 in the routine). | ||
|  |         # | ||
|  |         my $uri = validateURI($rec,\%uridata); | ||
|  |         next unless $uri; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check whether we already have this URI in the database | ||
|  |         # | ||
|  |         if (dbSearch( | ||
|  |                 $dbh, 'SELECT * FROM urls WHERE url = ?', | ||
|  |                 $uridata{URI} | ||
|  |             ) | ||
|  |             ) | ||
|  |         { | ||
|  |             emit( $silent, "$uri is already in the database\n" ); | ||
|  |             $uridata{SAVE} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check the hostname resolves in DNS | ||
|  |         # | ||
|  |         if ( checkDNS( $uri, \%uridata ) ) { | ||
|  |             $uridata{DNS} = join( ", ", @{ $uridata{DNS} } ); | ||
|  |             emit( $silent, "DNS: $uridata{DNS}\n" ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "$uri has no DNS entry\n" ); | ||
|  |             $uridata{SAVE} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check the server is available. Don't save if it's not (probably will | ||
|  |         # not get the feed anyway). | ||
|  |         # | ||
|  |         if ( serverUp( $uri, \%uridata ) ) { | ||
|  |             emit( $silent, sprintf( "Host: %s is up\n", $uridata{HOST} ) ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, sprintf( "Host: %s is down\n", $uridata{HOST} ) ); | ||
|  |             $uridata{SAVE} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check the site's robots.txt. If there's a block then don't save. | ||
|  |         # | ||
|  |         if ( robotRulesOK( $uri, $rules, \%uridata ) ) { | ||
|  |             emit( $silent, "Check of robots.txt rules succeeded\n" ); | ||
|  |         } | ||
|  |         else { | ||
|  |             if ( $uridata{ROBOTS} =~ /404/ ) { | ||
|  |                 emit( $silent, "Search for robots.txt file failed\n" ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit( $silent, "Check of robots.txt rules failed\n" ); | ||
|  |                 $uridata{SAVE} = 0; | ||
|  |                 next; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Look for the HTTP content type. Don't save if the request failed. | ||
|  |         # | ||
|  |         if ( checkContentType( $uri, \%uridata, \%headers, $LOG ) ) { | ||
|  |             emit( $silent, "HTTP request OK\n" ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "HTTP request failed\n" ); | ||
|  |             $uridata{SAVE} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Parse the feed | ||
|  |         # | ||
|  |         if ( $uridata{TYPE} eq 'Feed' ) { | ||
|  |             $LOG->info('Processing feed: ',$uridata{URI}); | ||
|  |             $stream = getFeed( $uridata{URI} ); | ||
|  |             if ($stream) { | ||
|  |                 $feed = parseFeed( $uridata{URI}, $stream ); | ||
|  |                 unless ( $feed ) { | ||
|  |                     $uridata{SAVE} = 0; | ||
|  |                     next; | ||
|  |                 } | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Provide a means of examining the returned feed object. There | ||
|  |                 # are some weird behaviours in there (partly because of the | ||
|  |                 # weirdness of RSS and poor adherence to what standards there | ||
|  |                 # are). | ||
|  |                 # | ||
|  |                 print Dumper($feed), "\n" if ( $DEBUG > 2 ); | ||
|  |                 storeFeed($feed,\%uridata); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Perform a check on the copyright. Routine sets | ||
|  |                 # $uridata{SAVE} = 0 if the copyright is not acceptable. | ||
|  |                 # | ||
|  |                 if ($check) { | ||
|  |                     next unless checkCopyright(\%uridata); | ||
|  |                 } | ||
|  | 
 | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit($silent, "Problem collecting feed"); | ||
|  |                 $uridata{SAVE} = 0; | ||
|  |                 next; | ||
|  |             } | ||
|  |         } | ||
|  |         elsif ( $uridata{TYPE} eq 'HTML' ) { | ||
|  |             # | ||
|  |             # TODO Save the URL and find any feeds it contains | ||
|  |             # | ||
|  |             emit( $silent, | ||
|  |                 "Type $uridata{TYPE} not implemented yet; skipped\n" ); | ||
|  |             next; | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, | ||
|  |                 "Not a known type '$uridata{HTTP_CONTENT_TYPE}'; skipped\n" ); | ||
|  |             $uridata{SAVE} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  |     continue { | ||
|  |         # { This 'continue' block is executed for each iteration or 'next' } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Decide whether to save what we have collected | ||
|  |         # | ||
|  |         if ( $uridata{SAVE} ) { | ||
|  |             if ( addURI( $dbh, \%uridata, $keymap ) ) { | ||
|  |                 emit( $silent, "$uridata{URI} added to the database\n" ); | ||
|  |                 $LOG->info('Feed ',$uridata{URI},' added to the database'); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Get the id the database allocated for the row we added | ||
|  |                 # | ||
|  |                 $uridata{URI_ID} | ||
|  |                     = $dbh->last_insert_id( undef, undef, 'urls', undef ); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Add any enclosures we found | ||
|  |                 # | ||
|  |                 if ( defined( $uridata{ENCLOSURE_COUNT} ) | ||
|  |                     && $uridata{ENCLOSURE_COUNT} > 0 ) | ||
|  |                 { | ||
|  |                     if ( addEnclosures( $dbh, \%uridata ) ) { | ||
|  |                         emit( $silent, $uridata{ENCLOSURE_COUNT}, | ||
|  |                             " enclosures for $uridata{URI} added to the database\n" | ||
|  |                         ); | ||
|  |                     } | ||
|  |                 } | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit( $silent, | ||
|  |                     "$uridata{URI} was not added to the database\n" ); | ||
|  |                 $LOG->info('Feed ',$uridata{URI},' not added to the database'); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Dump what we have if requested | ||
|  |         # | ||
|  |         print Dumper( \%uridata ), "\n" if ( $DEBUG > 1 ); | ||
|  | 
 | ||
|  |         emit( $silent, '-' x 80, "\n" ); | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Temporary | ||
|  |     # | ||
|  |     return;    # undef | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: scanDB | ||
|  | #      PURPOSE: To scan the URLs in the database and update the stored data | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $keymap         hashref containing a map of key names to | ||
|  | #                               database field names | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub scanDB { | ||
|  |     my ($dbh, $keymap) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sth1, $rv1, $h1 ); | ||
|  |     my ( $aref, @urls, $DT, $stream, $feed ); | ||
|  |     my ( %uridata, $urichanges, $enc_changes ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Query to return all feed URLs | ||
|  |     # | ||
|  |     $sql1 = q{SELECT id, url FROM urls WHERE urltype = 'Feed' order by title}; | ||
|  | 
 | ||
|  |     $sth1 = $dbh->prepare($sql1); | ||
|  |     $rv1  = $sth1->execute(); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Collect everything as an arrayref pointing to a bunch of arrayrefs | ||
|  |     # containing the column details requested | ||
|  |     # | ||
|  |     $aref = $sth1->fetchall_arrayref; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Extract just the URL strings | ||
|  |     # | ||
|  |     @urls = map { $_->[1] } @{$aref}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Now process these URLs from the database one at a time | ||
|  |     # | ||
|  |     $sql1 = q{SELECT * FROM urls WHERE url = ?}; | ||
|  |     $sth1 = $dbh->prepare($sql1); | ||
|  | 
 | ||
|  |     my $count = 0; | ||
|  | 
 | ||
|  |     foreach my $url (@urls) { | ||
|  |         %uridata = (); | ||
|  | 
 | ||
|  |         $count++; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Record the scan for this URL | ||
|  |         # | ||
|  |         $DT = normaliseDT( DateTime->now() ); | ||
|  |         $uridata{SCANNED_ON} = $DT; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Default to OK | ||
|  |         # | ||
|  |         $uridata{SCAN_OK} = 1; | ||
|  | 
 | ||
|  |         emit( $silent, "Scanning '$url'\n" ); | ||
|  |         $rv1 = $sth1->execute($url); | ||
|  |         if ( $dbh->err ) { | ||
|  |             warn $dbh->errstr; | ||
|  |         } | ||
|  |         $h1 = $sth1->fetchrow_hashref; | ||
|  |         emit( $silent, $h1->{title}, "\n" ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # The URL should be valid already from the point at which it was added | ||
|  |         # to the database, but if we do this we get a canonical version (it | ||
|  |         # sets $uridata{SAVE} which makes no sense here, but we'll just ignore | ||
|  |         # it. It also sets $uridata{URI}, which is useful.) | ||
|  |         # | ||
|  |         my $uri = validateURI( $url, \%uridata ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check the hostname resolves in DNS | ||
|  |         # | ||
|  |         if ( checkDNS( $uri, \%uridata ) ) { | ||
|  |             $uridata{DNS} = join( ", ", @{ $uridata{DNS} } ); | ||
|  |             emit( $silent, "DNS: $uridata{DNS}\n" ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "$uri has no DNS entry\n" ); | ||
|  |             $uridata{SCAN_OK} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check the server is available | ||
|  |         # | ||
|  |         if ( serverUp( $uri, \%uridata ) ) { | ||
|  |             emit( $silent, sprintf( "Host: %s is up\n", $uridata{HOST} ) ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, sprintf( "Host: %s is down\n", $uridata{HOST} ) ); | ||
|  |             $uridata{SCAN_OK} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Look for the HTTP content type. | ||
|  |         # | ||
|  |         if ( checkContentType( $uri, \%uridata, \%headers, $LOG ) ) { | ||
|  |             emit( $silent, "HTTP request OK\n" ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "HTTP request failed\n" ); | ||
|  |             $uridata{SCAN_OK} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Note: not doing the robots.txt check since it was done at load time | ||
|  |         # | ||
|  | 
 | ||
|  |         # | ||
|  |         # We know we have a feed, so go get it | ||
|  |         # | ||
|  |         $stream = getFeed( $uridata{URI} ); | ||
|  |         if ($stream) { | ||
|  |             # | ||
|  |             # Parse the feed. The routine generates its own error messages | ||
|  |             # | ||
|  |             $feed = parseFeed( $uridata{URI}, $stream ); | ||
|  |             unless ( $feed ) { | ||
|  |                 $uridata{SCAN_OK} = 0; | ||
|  |                 next; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Save the important feed components in the %uridata hash | ||
|  |             # | ||
|  |             print Dumper($feed), "\n" if ( $DEBUG > 2 ); | ||
|  |             storeFeed( $feed, \%uridata ); | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "Problem collecting feed" ); | ||
|  |             $uridata{SCAN_OK} = 0; | ||
|  |             next; | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  |     continue { | ||
|  |         # { This 'continue' block is executed for each iteration or 'next' } | ||
|  | 
 | ||
|  |         # | ||
|  |         # If the scan went OK then perform a comparison between the new feed | ||
|  |         # data and that which is stored | ||
|  |         # | ||
|  |         if ( $uridata{SCAN_OK} ) { | ||
|  |                 $urichanges = updateURI( $dbh, \%uridata, $keymap ); | ||
|  |                 $LOG->info( 'Feed \'', $uridata{URI}, '\' URL changes = ', | ||
|  |                     $urichanges ) | ||
|  |                     if $urichanges > 0; | ||
|  | 
 | ||
|  |                 # TODO Update enclosures | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "$uridata{URI} was not scanned successfully\n" ); | ||
|  |             $LOG->info( 'Feed ', $uridata{URI}, ' not scanned successfully' ); | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Finished scanning this feed | ||
|  |         # | ||
|  |         emit( $silent, '-' x 80, "\n" ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Temporarily stop after the first N feeds | ||
|  |         # | ||
|  |         last if $count == 2; | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: reportDB | ||
|  | #      PURPOSE: To generate a printed report from the database | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $fh             output file handle | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: Perhaps we can get fancy with stored reports in the future | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub reportDB { | ||
|  |     my ($dbh,  $fh) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Query to report the contents of the 'urls' table with the number of | ||
|  |     # associated episodes | ||
|  |     # | ||
|  |     my $sql = q{ | ||
|  |         SELECT urls.id, ae.*, max(coalesce(ae.ep_issued,ae.ep_modified)) AS latest_ep | ||
|  |             FROM all_episodes ae | ||
|  |             JOIN urls ON (ae.urls_id = urls.id) | ||
|  |             GROUP by urls.id | ||
|  |             HAVING ae.urls_urltype = 'Feed' | ||
|  |     }; | ||
|  | 
 | ||
|  |     my $sth1 = $dbh->prepare($sql); | ||
|  |     my $rv   = $sth1->execute(); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     while ( my $h1 = $sth1->fetchrow_hashref() ) { | ||
|  |         foreach my $key ( sort( keys( %{$h1} ) ) ) { | ||
|  |             printf $fh "%20s: %s\n", $key, coalesce( $h1->{$key}, 'undef' ); | ||
|  |         } | ||
|  |         print $fh "\n"; | ||
|  |     } | ||
|  | 
 | ||
|  |     $sth1->finish; | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: collectData | ||
|  | #      PURPOSE: Collects data from the database for generating a report | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #      RETURNS: An array of hashrefs from the query | ||
|  | #  DESCRIPTION: Runs a fixed query looking for feed details in the 'urls' | ||
|  | #               table and the 'all_episodes' view, showing the date of the | ||
|  | #               latest episode. The result is an array of rows, each | ||
|  | #               represented as a hash, all sorted by the feed title. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub collectData { | ||
|  |     my ($dbh) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Query to report the contents of the 'urls' table with the details of the | ||
|  |     # latest episode | ||
|  |     # | ||
|  |     my $sql = q{ | ||
|  |         SELECT urls.id, ae.*, max(coalesce(ae.ep_issued,ae.ep_modified)) AS latest_ep | ||
|  |             FROM all_episodes ae | ||
|  |             JOIN urls ON (ae.urls_id = urls.id) | ||
|  |             GROUP by urls.id | ||
|  |             HAVING ae.urls_urltype = 'Feed' | ||
|  |             ORDER BY ae.urls_title | ||
|  |     }; | ||
|  | 
 | ||
|  |     my $sth1 = $dbh->prepare($sql); | ||
|  |     my $rv   = $sth1->execute(); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return everything as an arrayref of hashrefs | ||
|  |     # | ||
|  |     my $tbl_ary_ref = $sth1->fetchall_arrayref({}); | ||
|  | 
 | ||
|  |     $sth1->finish; | ||
|  | 
 | ||
|  |     return $tbl_ary_ref; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: countRows | ||
|  | #      PURPOSE: To count the rows in a table | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $sql            SQL expression to use | ||
|  | #      RETURNS: Number of rows found (note that zero is returned as 0E0) | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub countRows { | ||
|  |     my ( $dbh, $sql ) = @_; | ||
|  | 
 | ||
|  |     my $sth1 = $dbh->prepare($sql); | ||
|  |     my $rv   = $sth1->execute(); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  |     my $h1 = $sth1->fetch; | ||
|  |     $sth1->finish; | ||
|  | 
 | ||
|  |     return @$h1[0]; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: dbSearch | ||
|  | #      PURPOSE: To perform a simple search in the database | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $sql            SQL expression to use (expected to be | ||
|  | #                               a SELECT) | ||
|  | #               @args           arguments for the 'execute' | ||
|  | #      RETURNS: True (1) if the row exists, otherwise false (0). | ||
|  | #  DESCRIPTION: Uses 'prepare_cached' to allow repeated calls with the same | ||
|  | #               SQL without incurring the overhead of repeated 'prepare' | ||
|  | #               calls. Only the first row is fetched (we expect there to be | ||
|  | #               only one) and the success or failure is determined by its | ||
|  | #               existence. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub dbSearch { | ||
|  |     my ( $dbh, $sql, @args ) = @_; | ||
|  | 
 | ||
|  |     my $sth1 = $dbh->prepare_cached($sql); | ||
|  |     my $rv   = $sth1->execute(@args); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  |     my $h1 = $sth1->fetchrow_hashref(); | ||
|  |     $sth1->finish; | ||
|  | 
 | ||
|  |     return defined($h1); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: execSQL | ||
|  | #      PURPOSE: To perform a non-SELECT query | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $sql            SQL expression to use | ||
|  | #               @args           arguments for the 'execute' | ||
|  | #      RETURNS: True (1) if the query succeeded, otherwise false (0). | ||
|  | #  DESCRIPTION: Uses 'prepare_cached' to allow repeated calls with the same | ||
|  | #               SQL without incurring the overhead of repeated 'prepare' | ||
|  | #               calls. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub execSQL { | ||
|  |     my ( $dbh, $sql, @args ) = @_; | ||
|  | 
 | ||
|  |     my $sth1 = $dbh->prepare_cached($sql); | ||
|  |     my $rv   = $sth1->execute(@args); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  |     $sth1->finish; | ||
|  | 
 | ||
|  |     return $rv; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: validateURI | ||
|  | #      PURPOSE: Checks a URL for validity | ||
|  | #   PARAMETERS: $rec            the URL handed to the script | ||
|  | #               $uridata        hashref containing data for this URI | ||
|  | #      RETURNS: A URI object if valid otherwise undef | ||
|  | #  DESCRIPTION: The URL string is validated with the URI module. A canonical | ||
|  | #               string version is stored in the hash referenced by $uridata | ||
|  | #               hash if valid otherwise the URL is marked as invalid. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub validateURI { | ||
|  |     my ( $rec, $uridata ) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Check that we have a valid URL. We don't save them if they are | ||
|  |     # invalid. | ||
|  |     # | ||
|  |     my $uri = URI->new( $rec, 'http' ); | ||
|  |     if ( $uri->scheme ) { | ||
|  |         emit( $silent, "URI $uri is valid\n" ); | ||
|  |         $uridata->{URI} = $uri->canonical->as_string; | ||
|  |         return $uri; | ||
|  |     } | ||
|  |     else { | ||
|  |         emit( $silent, "URI $uri is not valid\n" ); | ||
|  |         $uridata->{SAVE} = 0; | ||
|  |         return; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: checkDNS | ||
|  | #      PURPOSE: Looks up a host DNS entry | ||
|  | #   PARAMETERS: $uri            URI object | ||
|  | #               $uridata        hashref containing data for this URI | ||
|  | #      RETURNS: True (1) if the DNS query was successful, otherwise false (0) | ||
|  | #  DESCRIPTION: The host name is extracted from the URI (and stored). The | ||
|  | #               hostname is searched for in the DNS and if successful, an | ||
|  | #               array of addresses from the 'A' records is built. This is | ||
|  | #               stored in the hash referenced by $uridata. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub checkDNS { | ||
|  |     my ( $uri, $uridata ) = @_; | ||
|  | 
 | ||
|  |     my @adds; | ||
|  |     my $hostname = $uri->host; | ||
|  |     $uridata->{HOST} = $hostname; | ||
|  | 
 | ||
|  |     my $res   = Net::DNS::Resolver->new; | ||
|  |     my $query = $res->search($hostname); | ||
|  |     if ($query) { | ||
|  |         foreach my $rr ( $query->answer ) { | ||
|  |             next unless $rr->type eq "A"; | ||
|  |             push( @adds, $rr->address ); | ||
|  |         } | ||
|  |         $uridata->{DNS} = \@adds; | ||
|  |         return 1; | ||
|  |     } | ||
|  |     else { | ||
|  |         warn "Query failed: ", $res->errorstring, "\n"; | ||
|  |         return 0; | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: serverUp | ||
|  | #      PURPOSE: Checks that a given host is responding | ||
|  | #   PARAMETERS: $uri            URI object | ||
|  | #               $uridata        hashref containing data for this URI | ||
|  | #      RETURNS: True (1) if the host responds to a TCP connection, false (0) | ||
|  | #               otherwise | ||
|  | #  DESCRIPTION: Given an URL parses out the hostname and the port (defaulting | ||
|  | #               to the appropriate default for the scheme, such as 80 for | ||
|  | #               http). Attempts to connect to this host and port. If the | ||
|  | #               connect fails then details are written to the data structure | ||
|  | #               pointed to by $uridata. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub serverUp { | ||
|  |     my ( $uri, $uridata ) = @_; | ||
|  | 
 | ||
|  |     my ( $hostname, $port, $socket ); | ||
|  | 
 | ||
|  |     $hostname        = $uri->host(); | ||
|  |     $port            = $uri->port(); | ||
|  |     $uridata->{PORT} = $port; | ||
|  | 
 | ||
|  |     $socket = IO::Socket::INET->new( | ||
|  |         Proto    => "tcp", | ||
|  |         PeerAddr => $hostname, | ||
|  |         PeerPort => $port, | ||
|  |         Reuse    => 1, | ||
|  |         Timeout  => 10 | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ($socket) { | ||
|  |         $socket->close; | ||
|  |         $uridata->{HOSTUP} = 1; | ||
|  |         return 1; | ||
|  |     } | ||
|  |     else { | ||
|  |         $uridata->{HOSTUP} = 0; | ||
|  |         return 0; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: robotRulesOK | ||
|  | #      PURPOSE: To check the intended URL against the site's robots.txt rules | ||
|  | #   PARAMETERS: $uri            URI object | ||
|  | #               $rules          WWW::RobotRules object | ||
|  | #               $uridata        hashref containing data for this URI | ||
|  | #      RETURNS: True (1) if the GET of the robots.txt file succeeded and the | ||
|  | #               rules allow the URI object to be fetched, false (0) otherwise. | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub robotRulesOK { | ||
|  |     my ( $uri, $rules, $uridata ) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Build the URL of robots.txt | ||
|  |     # | ||
|  |     my $roboturl = $uri->scheme . '://' . $uri->host . '/robots.txt'; | ||
|  |     my $robots_txt; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Use LWP::UserAgent to get the feed and handle errors | ||
|  |     # | ||
|  |     my $ua = LWP::UserAgent->new; | ||
|  |     $ua->timeout(10); | ||
|  |     $ua->agent("$PROG/$VERSION"); | ||
|  | 
 | ||
|  |     my $response = $ua->get($roboturl); | ||
|  |     if ( $response->is_success ) { | ||
|  |         $uridata->{ROBOTS} = 'Found'; | ||
|  |         $robots_txt = $response->decoded_content; | ||
|  |         $rules->parse( $roboturl, $robots_txt ); | ||
|  |         return $rules->allowed("$uri"); | ||
|  |     } | ||
|  |     else { | ||
|  |         $uridata->{ROBOTS} = $response->status_line; | ||
|  |         warn "Failed to get $roboturl\n"; | ||
|  |         warn $response->status_line . "\n"; | ||
|  |         return; # undef | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: checkContentType | ||
|  | #      PURPOSE: Check the content_type of the url | ||
|  | #   PARAMETERS: $uri            URI object | ||
|  | #               $uridata        hashref containing data for this URI | ||
|  | #               $headers        hashref containing query headers | ||
|  | #               $log            Log::Handler object | ||
|  | #      RETURNS: True (1) if all was well, otherwise false (0) | ||
|  | #  DESCRIPTION: Ensures that we are pulling txt/html/xml. We get the headers | ||
|  | #               for the URI object using the LWP::UserAgent head method. Then | ||
|  | #               we examine the 'content-type' header looking for the string | ||
|  | #               'xml' or 'html' in it. The former denotes a feed, and the | ||
|  | #               latter a normal HTML page. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub checkContentType { | ||
|  |     my ( $uri, $uridata, $headers, $log ) = @_; | ||
|  | 
 | ||
|  |     my @feeds; | ||
|  | 
 | ||
|  |     $uridata->{HTTP_STATUS} = 'Unknown'; | ||
|  | 
 | ||
|  |     my $browser = LWP::UserAgent->new or return 0; | ||
|  | 
 | ||
|  |     my $response = $browser->head( $uri->as_string, %{$headers} ) | ||
|  |         or return 0; | ||
|  | 
 | ||
|  |     $uridata->{HTTP_STATUS} = $response->status_line; | ||
|  | 
 | ||
|  |     if ( $response->is_success ) { | ||
|  |         $uridata->{HTTP_CONTENT_TYPE} = $response->header('content-type'); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Decode the content-type we received | ||
|  |         # | ||
|  |         if ( $uridata->{HTTP_CONTENT_TYPE} =~ m|xml|i ) { | ||
|  |             $uridata->{TYPE} = 'Feed'; | ||
|  |         } | ||
|  |         elsif ( $uridata->{HTTP_CONTENT_TYPE} =~ m|html|i ) { | ||
|  |             $uridata->{TYPE} = 'HTML'; | ||
|  |         } | ||
|  |         else { | ||
|  |             $uridata->{TYPE} = 'Unknown'; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Sometimes perfectly valid feeds misleadingly return text/html so we | ||
|  |         # try to spot such cases here and adjust the internal type accordingly | ||
|  |         # | ||
|  |         if ( $uridata->{TYPE} eq 'HTML' ) { | ||
|  |             @feeds = Feed::Find->find( $uri->as_string ); | ||
|  |             if ( scalar(@feeds) == 1 && $feeds[0] eq $uri->as_string ) { | ||
|  |                 emit( $silent, "Feed found with wrong content-type\n" ); | ||
|  |                 $uridata->{TYPE} = 'Feed'; | ||
|  |             } | ||
|  |             else { | ||
|  |                 emit( $silent, "Found ", scalar(@feeds), | ||
|  |                     " feeds within this HTML page\n" ); | ||
|  |                 print Dumper( \@feeds ), "\n" if $DEBUG > 0; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         $log->info( "URL content classified as: ", $uridata->{TYPE} ); | ||
|  |         emit( $silent, "URL content classified as: ", $uridata->{TYPE}, "\n" ); | ||
|  |         return 1; | ||
|  |     } | ||
|  |     else { | ||
|  |         $uridata->{HTTP_CONTENT_TYPE} = $uridata->{TYPE} = 'Unknown'; | ||
|  |         return 0; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: getFeed | ||
|  | #      PURPOSE: Download the contents of a feed URL | ||
|  | #   PARAMETERS: $feed_url       URL of the feed to download | ||
|  | #      RETURNS: String representation of the feed contents or undef if the | ||
|  | #               download failed. | ||
|  | #  DESCRIPTION: Issues a GET on the URL which is expected to be a feed (but | ||
|  | #               need not be). If successful the contents are decoded and | ||
|  | #               returned, otherwise undef is returned. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: | ||
|  | #=============================================================================== | ||
|  | sub getFeed { | ||
|  |     my ($feed_url) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Use LWP::UserAgent to get the feed and handle errors | ||
|  |     # | ||
|  |     my $ua = LWP::UserAgent->new; | ||
|  |     $ua->timeout(10); | ||
|  |     $ua->agent("$PROG/$VERSION"); | ||
|  |     my $response = $ua->get($feed_url); | ||
|  | 
 | ||
|  |     my $feed_content; | ||
|  |     if ( $response->is_success ) { | ||
|  |         $feed_content = $response->decoded_content; | ||
|  |         return $feed_content; | ||
|  |     } | ||
|  |     else { | ||
|  |         warn "Failed to get $feed_url\n"; | ||
|  |         warn $response->status_line, "\n"; | ||
|  |         return; # undef | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: parseFeed | ||
|  | #      PURPOSE: Parse a podcast feed that has already been downloaded | ||
|  | #   PARAMETERS: $feed_url       URL of the feed previously downloaded | ||
|  | #               $feed_content   String containing the content of the feed, for | ||
|  | #                               parsing | ||
|  | #      RETURNS: An XML::Feed object containing the parsed feed or undef if the | ||
|  | #               parse failed | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub parseFeed { | ||
|  |     my ( $feed_url, $feed_content ) = @_; | ||
|  | 
 | ||
|  |     my $feed = XML::Feed->parse( \$feed_content ); | ||
|  |     unless ($feed) { | ||
|  |         # | ||
|  |         # Something went wrong. Abort this feed | ||
|  |         # | ||
|  |         warn "Failed to parse $feed_url: ", XML::Feed->errstr, "\n"; | ||
|  |         return; # undef | ||
|  |     } | ||
|  | 
 | ||
|  |     return $feed; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: storeFeed | ||
|  | #      PURPOSE: Stores feed attributes in a hash | ||
|  | #   PARAMETERS: $feed           XML::Feed object returned from parsing the | ||
|  | #                               feed | ||
|  | #               $uridata        hashref containing data for this URI | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub storeFeed { | ||
|  |     my ($feed, $uridata) = @_; | ||
|  | 
 | ||
|  |     ( $uridata->{TITLE}  = $feed->title ) =~ s/(^\s+|\s+$)//g; | ||
|  |     ( $uridata->{FORMAT} = $feed->format ) =~ s/(^\s+|\s+$)//g; | ||
|  | 
 | ||
|  |     $uridata->{LINK}        = $feed->link; | ||
|  |     $uridata->{DESCRIPTION} = clean_string( $feed->tagline ); | ||
|  |     $uridata->{AUTHOR}      = $feed->author; | ||
|  |     $uridata->{MODIFIED}    = normaliseDT( $feed->modified ); | ||
|  |     $uridata->{IMAGE}       = flattenArray( $feed->image ); | ||
|  |     $uridata->{COPYRIGHT}   = $feed->copyright; | ||
|  |     $uridata->{GENERATOR}   = $feed->generator; | ||
|  |     $uridata->{LANGUAGE}    = $feed->language; | ||
|  |     #print coalesce($feed->webMaster,'No webMaster'),"\n"; | ||
|  | 
 | ||
|  |     $uridata->{ENCLOSURES} = extractEnclosures($feed); | ||
|  |     $uridata->{ENCLOSURE_COUNT} | ||
|  |         = scalar( @{ $uridata->{ENCLOSURES} } ); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: checkCopyright | ||
|  | #      PURPOSE: Ask the user to check the copyright of a feed | ||
|  | #   PARAMETERS: | ||
|  | #      RETURNS: | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub checkCopyright { | ||
|  |     my ($uridata) = @_; | ||
|  | 
 | ||
|  |     my $decision; | ||
|  |     $LOG->info('Checking copyright of feed'); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prompt the user, failing gracefully if there's | ||
|  |     # a problem. If the user types 'Y' or 'y' we accept the | ||
|  |     # feed, otherwise we do not (thus a blank return = 'no'). | ||
|  |     # | ||
|  |     try { | ||
|  |         printf STDERR | ||
|  |             "Feed '%s' has the copyright string:\n%s\n", | ||
|  |             $uridata->{TITLE}, | ||
|  |             coalesce( $uridata->{COPYRIGHT}, '' ); | ||
|  |         $decision = prompt( | ||
|  |             -in     => *STDIN, | ||
|  |             -prompt => 'OK to add this feed?', | ||
|  |             -style  => 'bold red underlined', | ||
|  |             -yes | ||
|  |         ); | ||
|  |     } | ||
|  |     catch { | ||
|  |         warn "Problem processing copyright decision: $_"; | ||
|  |         $decision = 0; | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Take action on the decision (or default) | ||
|  |     # | ||
|  |     $uridata->{SAVE} = $decision; | ||
|  |     if ($decision) { | ||
|  |         emit( $silent, "Feed added\n" ); | ||
|  |         $LOG->info('Copyright OK'); | ||
|  |         return 1; | ||
|  |     } | ||
|  |     else { | ||
|  |         emit( $silent, "Feed not added\n" ); | ||
|  |         $LOG->info('Copyright not OK'); | ||
|  |         return 0; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: parseRSS | ||
|  | #      PURPOSE: Attempt to parse a feed as RSS | ||
|  | #   PARAMETERS: $feed_url       URL of the feed previously downloaded | ||
|  | #               $feed_content   String containing the content of the feed, for | ||
|  | #                               parsing | ||
|  | #      RETURNS: An XML::RSS object containing the parsed feed or undef if the | ||
|  | #               parse failed | ||
|  | #  DESCRIPTION: ** Incomplete ** | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub parseRSS { | ||
|  |     my ( $feed_url, $feed_content ) = @_; | ||
|  | 
 | ||
|  |     my $rss = XML::RSS->parse(\$feed_content); | ||
|  |     unless ($rss) { | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: addURI | ||
|  | #      PURPOSE: Adds the data for a URI to the database | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $uridata        hashref containing data for the current URI | ||
|  | #               $keymap         hashref containing a map of key names to | ||
|  | #                               database field names | ||
|  | #      RETURNS: True (1) if the insert succeeded, false (0) otherwise | ||
|  | #  DESCRIPTION: The hash keys are defined as an array to make it easy to slice | ||
|  | #               the hash and the SQL is defined internally using the size of | ||
|  | #               the key array as a guide to the number of '?' placeholders. | ||
|  | #               These are passed to execSQL to do the work. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub addURI { | ||
|  |     my ( $dbh, $uridata, $keymap ) = @_; | ||
|  | 
 | ||
|  |     my @keys = ( | ||
|  |         'URI',               'DNS', | ||
|  |         'HOSTUP',            'HTTP_STATUS', | ||
|  |         'HTTP_CONTENT_TYPE', 'TYPE', | ||
|  |         'FORMAT',            'TITLE', | ||
|  |         'DESCRIPTION',       'AUTHOR', | ||
|  |         'MODIFIED',          'LINK', | ||
|  |         'IMAGE',             'COPYRIGHT', | ||
|  |         'GENERATOR',         'LANGUAGE', | ||
|  |     ); | ||
|  | 
 | ||
|  |     my $sql | ||
|  |         = 'INSERT INTO urls (' | ||
|  |         . join( ",", @{$keymap}{@keys} ) . ') ' | ||
|  |         . 'VALUES(' | ||
|  |         . join( ',', ('?') x scalar(@keys) ) . ')'; | ||
|  | 
 | ||
|  |     print "addURI query: $sql\n" if $DEBUG > 0; | ||
|  | 
 | ||
|  |     return execSQL( $dbh, $sql, @{$uridata}{@keys} ); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: extractEnclosures | ||
|  | #      PURPOSE: Builds an array of hashes containing enclosure data from an | ||
|  | #               XML::Feed object | ||
|  | #   PARAMETERS: $feed           XML::Feed object | ||
|  | #      RETURNS: A reference to the array of anonymous hashes built from the | ||
|  | #               feed. | ||
|  | #  DESCRIPTION: The XML::Feed object is expected to contain an array of | ||
|  | #               entries. These are converted to hashes, references to which | ||
|  | #               are stored in an array. The two DateTime components are | ||
|  | #               converted to ISO8601 dates. If there is an enclosure then its | ||
|  | #               elements are saved.  Note that there could be multiple | ||
|  | #               enclosures, but XML::Feed does not cater for them unless | ||
|  | #               explicitly requested. We do not deal with such a case here. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub extractEnclosures { | ||
|  |     my ($feed) = @_; | ||
|  | 
 | ||
|  |     my @encs; | ||
|  | 
 | ||
|  |     foreach my $entry ( $feed->entries ) { | ||
|  |         my %ent; | ||
|  | 
 | ||
|  |         $ent{title}    = $entry->title; | ||
|  |         $ent{base}     = $entry->base; | ||
|  |         $ent{link}     = $entry->link; | ||
|  |         $ent{category} = join( ", ", $entry->category ); | ||
|  |         $ent{author}   = $entry->author; | ||
|  |         $ent{id}       = $entry->id; | ||
|  |         $ent{issued}   = normaliseDT( $entry->issued ); | ||
|  |         $ent{modified} = normaliseDT( $entry->modified ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Work around a bug in XML::Feed where the source method is only | ||
|  |         # available for Atom feeds. TODO report this. | ||
|  |         # | ||
|  |         if ( $entry->isa('XML::Feed::Entry::Format::Atom') ) { | ||
|  |             $ent{source} = $entry->source; | ||
|  |         } | ||
|  | 
 | ||
|  |         my ($enclosure) = $entry->enclosure; | ||
|  |         if ( defined($enclosure) ) { | ||
|  |             $ent{url}    = $enclosure->url; | ||
|  |             $ent{type}   = $enclosure->type; | ||
|  |             $ent{length} = $enclosure->length; | ||
|  |         } | ||
|  | 
 | ||
|  |         push( @encs, \%ent ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return \@encs; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: addEnclosures | ||
|  | #      PURPOSE: Adds episodes extracted from a feed into the database | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $uridata        hashref containing data for the current URI | ||
|  | #                               including an arrayref of hashrefs of episode | ||
|  | #                               data | ||
|  | #      RETURNS: True (1) if all the inserts succeeded, false (0) otherwise | ||
|  | #  DESCRIPTION: The SQL is defined internally and the hash keys are defined as | ||
|  | #               an array to make it easy to slice the hash. The enclosures (or | ||
|  | #               more correctly, feed items) are present in the hash as an | ||
|  | #               array of anonymous hashes. These are processed one at a time | ||
|  | #               and inserted into the database. A count of the number of | ||
|  | #               successful inserts is kept. This is compared with the number | ||
|  | #               of enclosures to determine the boolean value to return. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub addEnclosures { | ||
|  |     my ( $dbh, $uridata ) = @_; | ||
|  | 
 | ||
|  |     my $sql = q{INSERT INTO episodes | ||
|  |         (urls_id, link, enclosure, title, author, category, source, ep_id, | ||
|  |         issued, modified, byte_length, mime_type) VALUES(?,?,?,?,?,?,?,?,?,?,?,?)}; | ||
|  | 
 | ||
|  |     my @keys = ( | ||
|  |         'link', 'url',    'title',    'author', 'category', 'source', | ||
|  |         'id',   'issued', 'modified', 'length', 'type' | ||
|  |     ); | ||
|  | 
 | ||
|  |     my $successes = 0; | ||
|  | 
 | ||
|  |     foreach my $enc ( @{ $uridata->{ENCLOSURES} } ) { | ||
|  |         if ( execSQL( $dbh, $sql, $uridata->{URI_ID}, @{$enc}{@keys} ) ) { | ||
|  |             $successes++; | ||
|  |         } | ||
|  |         else { | ||
|  |             emit( $silent, "Failed to add episode $enc->{url}\n" ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return ( $successes == $uridata->{ENCLOSURE_COUNT} ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: updateURI | ||
|  | #      PURPOSE: Compare the data in a hash with that in the database | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $uridata        hashref containing data for the current URI | ||
|  | #                               including an arrayref of hashrefs of episode | ||
|  | #                               data | ||
|  | #               $keymap         hashref containing a map of key names to | ||
|  | #                               database field names | ||
|  | #      RETURNS: The number of changes made | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub updateURI { | ||
|  |     my ( $dbh, $uridata, $keymap ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sth1, $rv1, $h1 ); | ||
|  |     my ( %fieldvals, %where ); | ||
|  |     my ( $diffs, $updates ) = ( 0, 0 ); | ||
|  |     my @keys = ( | ||
|  |         'URI',               'DNS', | ||
|  |         'HOSTUP',            'HTTP_STATUS', | ||
|  |         'HTTP_CONTENT_TYPE', 'TYPE', | ||
|  |         'FORMAT',            'TITLE', | ||
|  |         'DESCRIPTION',       'AUTHOR', | ||
|  |         'MODIFIED',          'LINK', | ||
|  |         'IMAGE',             'COPYRIGHT', | ||
|  |         'GENERATOR',         'LANGUAGE', | ||
|  |     ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Get the row from the urls table | ||
|  |     # | ||
|  |     $sql1 = q{SELECT * FROM urls WHERE url = ?}; | ||
|  |     $sth1 = $dbh->prepare($sql1); | ||
|  |     $rv1  = $sth1->execute( $uridata->{URI} ); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  |     $h1 = $sth1->fetchrow_hashref; | ||
|  | 
 | ||
|  |     for my $key (@keys) { | ||
|  |         unless ( equal( $uridata->{$key}, $h1->{ $keymap->{$key} } ) ) { | ||
|  |             $diffs++; | ||
|  |             $fieldvals{$key} = $uridata->{$key}; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Temporary report | ||
|  |             # | ||
|  |             print "Difference: ($key)\n"; | ||
|  |             print "  Feed:     ", coalesce($uridata->{$key},''), "\n"; | ||
|  |             print "  Database: ", coalesce($h1->{ $keymap->{$key} },''), "\n"; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     if ($diffs > 0) { | ||
|  |         # | ||
|  |         # Prepare to use SQL::Abstract | ||
|  |         # | ||
|  |         my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Build the update statement | ||
|  |         # | ||
|  |         $where{id} = $h1->{id}; | ||
|  |         my ( $stmt, @bindvals ) | ||
|  |             = $sql->update( 'urls', \%fieldvals, \%where ); | ||
|  | 
 | ||
|  |         # Temporary | ||
|  |         print "Statement: $stmt\n"; | ||
|  |         print "Bind values: ", join(",",@bindvals),"\n"; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Perform the updates | ||
|  |         # | ||
|  |         $sth1 = $dbh->prepare($stmt); | ||
|  |         $sth1->execute(@bindvals); | ||
|  |         if ( $dbh->err ) { | ||
|  |             warn "Processing $h1->{url}\n", $dbh->errstr; | ||
|  |         } | ||
|  |         else { | ||
|  |             emit($silent, "Updated $h1->{url}\n"); | ||
|  |             $updates++; | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     return $updates; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: updateEnclosures | ||
|  | #      PURPOSE: Update the enclosures stored with a feed URL | ||
|  | #   PARAMETERS: $dbh            database handle | ||
|  | #               $uridata        hashref containing data for the current URI | ||
|  | #                               including an arrayref of hashrefs of episode | ||
|  | #                               data | ||
|  | #      RETURNS: The number of changes made | ||
|  | #  DESCRIPTION:  | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub updateEnclosures { | ||
|  |     my ( $dbh, $uridata ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sth1, $rv1, $h1 ); | ||
|  |     my ( %fieldvals, %where ); | ||
|  |     my ( $diffs, $updates ) = ( 0, 0 ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Get the enclosures associated with this feed | ||
|  |     # | ||
|  |     $sql1 = q{ | ||
|  |         SELECT * FROM episodes | ||
|  |         WHERE urls_id = (SELECT id FROM urls WHERE title = ?) | ||
|  |     }; | ||
|  |     $sth1 = $dbh->prepare($sql1); | ||
|  |     $rv1  = $sth1->execute( $uridata->{URI} ); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  |     $h1 = $sth1->fetchrow_hashref; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: equal | ||
|  | #      PURPOSE: Compare two strings even if undefined | ||
|  | #   PARAMETERS: $s1             The first string | ||
|  | #               $s2             The second string | ||
|  | #      RETURNS: True if both strings are undefined, false if one isn't | ||
|  | #               defined, otherwise the result of comparing them. | ||
|  | #  DESCRIPTION: Works on the principle that two undefined strings are equal, | ||
|  | #               a defined and an undefined string are not, and otherwise they | ||
|  | #               are equal if they are equal! | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: | ||
|  | #=============================================================================== | ||
|  | sub equal { | ||
|  |     my ( $s1, $s2 ) = @_; | ||
|  | 
 | ||
|  |     return 1 if ( !defined($s1) && !defined($s2) ); | ||
|  |     return 0 if ( !defined($s1) || !defined($s2) ); | ||
|  | 
 | ||
|  |     return ( $s1 eq $s2 ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: clean_string | ||
|  | #      PURPOSE: Clean a string of non-printables, newlines, multiple spaces | ||
|  | #   PARAMETERS: $str            The string to process | ||
|  | #      RETURNS: The processed string | ||
|  | #  DESCRIPTION: Removes leading and trailing spaces. Removes all non-printable | ||
|  | #               characters. Removes all CR/LF sequences. Removes multiple | ||
|  | #               spaces. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: | ||
|  | #=============================================================================== | ||
|  | sub clean_string { | ||
|  |     my ($str) = @_; | ||
|  | 
 | ||
|  |     $str =~ s/(^\s+|\s+$)//g; | ||
|  |     $str =~ tr/[[:graph:]]//c; | ||
|  |     $str =~ tr/\x0A\x0D/ /; | ||
|  |     $str =~ tr/ / /s; | ||
|  | 
 | ||
|  |     return $str; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: normaliseDT | ||
|  | #      PURPOSE: Normalise an ISO8601 date for comparison, etc. | ||
|  | #   PARAMETERS: $dt             a DateTime object | ||
|  | #      RETURNS: The DateTime object formatted as an ISO8601 string | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub normaliseDT { | ||
|  |     my ($dt) = @_; | ||
|  | 
 | ||
|  |     my $p = DateTime::Format::SQLite->new(); | ||
|  | 
 | ||
|  |     return ( | ||
|  |         defined($dt) | ||
|  |         ? $p->format_datetime( | ||
|  |             DateTime::Format::ISO8601->parse_datetime($dt) | ||
|  |             ) | ||
|  |         : undef | ||
|  |     ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: flattenArray | ||
|  | #      PURPOSE: Turns an arrayref into a simple list in a string | ||
|  | #   PARAMETERS: $item - the item that may be an arrayref | ||
|  | #      RETURNS: The plain item if it's not an array otherwise the flattened | ||
|  | #               version | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub flattenArray { | ||
|  |     my ($item) = @_; | ||
|  | 
 | ||
|  |     my $result; | ||
|  |     if ( ref( $item ) eq 'ARRAY' ) { | ||
|  |         $result = join(", ",@{$item}); | ||
|  |     } | ||
|  |     else { | ||
|  |         $result = $item; | ||
|  |     } | ||
|  | 
 | ||
|  |     return $item; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: coalesce | ||
|  | #      PURPOSE: To find the defined argument and return it | ||
|  | #   PARAMETERS: Arbitrary number of arguments | ||
|  | #      RETURNS: The first defined argument | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub coalesce { | ||
|  |     foreach (@_) { | ||
|  |         return $_ if defined($_); | ||
|  |     } | ||
|  |     return; # undef | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: emit | ||
|  | #      PURPOSE: Print text on STDERR unless silent mode has been selected | ||
|  | #   PARAMETERS: - Boolean indicating whether to be silent or not | ||
|  | #               - list of arguments to 'print' | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: This is a wrapper around 'print' to determine whether to send | ||
|  | #               a message to STDERR depending on a boolean. We need this to be | ||
|  | #               able to make the script silent when the -silent option is | ||
|  | #               selected | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub emit { | ||
|  |     unless (shift) { | ||
|  |         print STDERR @_; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Options | ||
|  | #      PURPOSE: Processes command-line options | ||
|  | #   PARAMETERS: $optref     Hash reference to hold the options | ||
|  | #      RETURNS: Undef | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub Options { | ||
|  |     my ($optref) = @_; | ||
|  | 
 | ||
|  |     my @options = ( | ||
|  |         "help",   "manpage",  "debug=i", "silent!", | ||
|  |         "load=s", "delete=s", "scan!",   "report!", | ||
|  |         "check!", "json:s",   "opml:s",  "config=s", | ||
|  |         "out=s",  "template:s", | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ( !GetOptions( $optref, @options ) ) { | ||
|  |         pod2usage( -msg => "Version $VERSION\n", -exitval => 1 ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | __END__ | ||
|  | 
 | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #  Application Documentation | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #{{{ | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | feedWatcher - watch a collection of podcast feeds | ||
|  | 
 | ||
|  | =head1 VERSION | ||
|  | 
 | ||
|  | This documentation refers to I<feedWatcher> version 0.0.14 | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 USAGE | ||
|  | 
 | ||
|  |  feedWatcher [-help] [-load=FILE] [-delete=FILE] [-[no]scan] [-[no]report] | ||
|  |     [-[no]check] [-out=FILE] [-json[=FILE]] [-opml[=FILE]] [-template=FILE] | ||
|  |     [-[no]silent] [-config=FILE] [-debug=N] [URL ...] | ||
|  | 
 | ||
|  | =head1 ARGUMENTS | ||
|  | 
 | ||
|  | Arguments are optional and may consist of an arbitrarily long list of URLs to | ||
|  | be processed and possibly added to the database by the script. | ||
|  | 
 | ||
|  | These URLs are prepended to any which may be provided through the | ||
|  | B<-load=FILE> option described below. The script makes sure the URL collection | ||
|  | contains no duplicates. | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | =over 8 | ||
|  | 
 | ||
|  | =item B<-help> | ||
|  | 
 | ||
|  | Prints a brief help message describing the usage of the program, and then exits. | ||
|  | 
 | ||
|  | The full documentation can be displayed with the command: | ||
|  | 
 | ||
|  |     perldoc -oman feedWatcher | ||
|  | 
 | ||
|  | Alternatively, use the option B<-manpage> | ||
|  | 
 | ||
|  | =item B<-manpage> | ||
|  | 
 | ||
|  | Prints the entire documentation for the script. Note that this mode uses a | ||
|  | simpler type of formatter than I<perldoc>. | ||
|  | 
 | ||
|  | =item B<-load=FILE> | ||
|  | 
 | ||
|  | Defines a file from which new URLs are to be added to the database. These URLs | ||
|  | are checked in various ways before adding to the database. If arguments are | ||
|  | provided when the script is invoked these URLs are appended to the argument list. | ||
|  | 
 | ||
|  | =item B<-delete=FILE> | ||
|  | 
 | ||
|  | Defines a file from which a list of URLs is to be read which are to be | ||
|  | deleted from the database. | ||
|  | 
 | ||
|  | Note that it is possible (though inadvisable) to both add and delete an URL in | ||
|  | the same run of the script. The URL will first be added (from the load file or | ||
|  | argument list) then deleted. This is a pointless exercise which wastes | ||
|  | bandwidth, so don't do it! | ||
|  | 
 | ||
|  | =item B<-[no]scan> | ||
|  | 
 | ||
|  | This option (B<-scan>) causes the URLs stored in the database to be scanned | ||
|  | and updated. The negated form, which is also the default behaviour of the | ||
|  | script, (B<-noscan>) omits the scan. | ||
|  | 
 | ||
|  | NOTE: This function is not implemented yet. | ||
|  | 
 | ||
|  | =item B<-out=FILE> | ||
|  | 
 | ||
|  | This option defines an output file to receive any output. If the option is | ||
|  | omitted the data is written to STDOUT, allowing it to be redirected if | ||
|  | required. | ||
|  | 
 | ||
|  | =item B<-[no]check> | ||
|  | 
 | ||
|  | This option (B<-check>) causes each feed which is being to be checked against | ||
|  | the script user to check that it's OK to add it. The script reports the | ||
|  | I<copyright> field and requests a I<y> or I<n> response. | ||
|  | 
 | ||
|  | =item B<-[no]report> | ||
|  | 
 | ||
|  | This option (B<-report>) causes a report of the contents of the database to be | ||
|  | generated.  The negated form, which is also the default behaviour of the | ||
|  | script, (B<-noreport>) omits the report. | ||
|  | 
 | ||
|  | NOTE: The report is currently very simple. | ||
|  | 
 | ||
|  | =item B<-json[=FILE]> | ||
|  | 
 | ||
|  | This option, which may be omitted, defines the location where the feed and | ||
|  | episode details are to be written. If omitted no JSON data is written. | ||
|  | 
 | ||
|  | If the option is given as B<-json=FILE> the data is written to the nominated | ||
|  | file. | ||
|  | 
 | ||
|  | If the B<=FILE> portion is omitted a default name of 'feedWatcher.json' is | ||
|  | used. | ||
|  | 
 | ||
|  | =item B<-opml[=FILE]> | ||
|  | 
 | ||
|  | This option, which may be omitted, defines the location where the feed details | ||
|  | are to be written. If omitted no OPML data is written. | ||
|  | 
 | ||
|  | If the option is given as B<-opml=FILE> the data is written to the nominated | ||
|  | file. | ||
|  | 
 | ||
|  | If the B<=FILE> portion is omitted a default name of 'feedWatcher.opml' is | ||
|  | used. | ||
|  | 
 | ||
|  | =item B<-template=FILE> | ||
|  | 
 | ||
|  | This option defines the template used to generate a form of the feed data. The | ||
|  | template is written using the B<Template> toolkit language. | ||
|  | 
 | ||
|  | If the file name is omitted then the script uses the file B<feedWatcher.tpl> | ||
|  | in the same directory as the script. If this file does not exist then the | ||
|  | script will exit with an error message. | ||
|  | 
 | ||
|  | For convenience B<feedWatcher.tpl> can be made a soft link which points to the | ||
|  | file which is the current default. This allows the development of versions | ||
|  | without changing the usual way this script is run. | ||
|  | 
 | ||
|  | =item B<-config=FILE> | ||
|  | 
 | ||
|  | This option allows an alternative configuration file to be used. This file | ||
|  | defines the location of the database, its name and the username and | ||
|  | password to be used to access it. This feature permits a test database to be | ||
|  | used, or for two more sets of feeds to be processed. | ||
|  | 
 | ||
|  | See the CONFIGURATION AND ENVIRONMENT section below for the file format. | ||
|  | 
 | ||
|  | If the option is omitted the default file is used: B<feedWatcher.cfg> | ||
|  | 
 | ||
|  | =item B<-debug=N> | ||
|  | 
 | ||
|  | This option selects the debug level, resulting in a lot of output. | ||
|  | 
 | ||
|  |     0   (the default) No debug output | ||
|  |     1   Dumps the list of feeds found in an HTML download. | ||
|  |         Also shows the SQL query which will result in the insertion of a new | ||
|  |         row into the database table I<urls>. | ||
|  |     2   Dumps the collected data which is destined to be written to the database | ||
|  |     3   Dumps the contents of feeds during analysis | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | A full description of the application and its features. | ||
|  | May include numerous subsections (i.e. =head2, =head3, etc.) | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DIAGNOSTICS | ||
|  | 
 | ||
|  | A list of every error and warning message that the application can generate | ||
|  | (even the ones that will "never happen"), with a full explanation of each | ||
|  | problem, one or more likely causes, and any suggested remedies. If the | ||
|  | application generates exit status codes (e.g. under Unix) then list the exit | ||
|  | status associated with each error. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 CONFIGURATION AND ENVIRONMENT | ||
|  | 
 | ||
|  | A full explanation of any configuration system(s) used by the application, | ||
|  | including the names and locations of any configuration files, and the | ||
|  | meaning of any environment variables or properties that can be set. These | ||
|  | descriptions must also include details of any configuration language used | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DEPENDENCIES | ||
|  | 
 | ||
|  |     Config::General | ||
|  |     DBI | ||
|  |     Data::Dumper | ||
|  |     DateTime::Format::ISO8601 | ||
|  |     DateTime::Format::SQLite | ||
|  |     Feed::Find | ||
|  |     Getopt::Long | ||
|  |     HTML::Entities | ||
|  |     IO::Socket | ||
|  |     JSON | ||
|  |     LWP::UserAgent | ||
|  |     List::MoreUtils | ||
|  |     Log::Handler | ||
|  |     Net::DNS | ||
|  |     Pod::Usage | ||
|  |     Template | ||
|  |     Template::Filters | ||
|  |     URI | ||
|  |     WWW::RobotRules | ||
|  |     XML::Feed | ||
|  |     XML::RSS::Parser | ||
|  | 
 | ||
|  | =head1 INCOMPATIBILITIES | ||
|  | 
 | ||
|  | A list of any modules that this module cannot be used in conjunction with. | ||
|  | This may be due to name conflicts in the interface, or competition for | ||
|  | system or program resources, or due to internal limitations of Perl | ||
|  | (for example, many modules that use source code filters are mutually | ||
|  | incompatible). | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 BUGS AND LIMITATIONS | ||
|  | 
 | ||
|  | There are no known bugs in this module. | ||
|  | Please report problems to Dave Morriss (Dave.Morriss@gmail.com) | ||
|  | Patches are welcome. | ||
|  | 
 | ||
|  | =head1 AUTHOR | ||
|  | 
 | ||
|  | Dave Morriss (Dave.Morriss@gmail.com) 2020 | ||
|  | 
 | ||
|  | =head1 LICENCE AND COPYRIGHT | ||
|  | 
 | ||
|  | Copyright (c) Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. | ||
|  | 
 | ||
|  | This program is free software. You can redistribute it and/or modify it under | ||
|  | the same terms as perl itself. | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | #}}} | ||
|  | 
 | ||
|  | # [zo to open fold, zc to close] | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker |