| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #!/usr/bin/env perl | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #         FILE: upload_manager | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        USAGE: ./upload_manager [-help] [-[no]dry-run] [-[no]verbose] | 
					
						
							|  |  |  | #               [-debug=N] -mode=MODE [-episode=N1 [-episode=N2 ...]] | 
					
						
							|  |  |  | #               [-range=START [-range=END]] [-json=FILE] [-scandir=FILE] | 
					
						
							|  |  |  | #               [-dbconfig=FILE] | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #  DESCRIPTION: Collect details about HPR shows and the copies on Archive.org | 
					
						
							|  |  |  | #               and keep them in a separate SQLite database (for | 
					
						
							|  |  |  | #               the moment). This database is used to populate the | 
					
						
							|  |  |  | #               developing PostgreSQL Database (which is not currently | 
					
						
							|  |  |  | #               progressing very much). | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #      OPTIONS: --- | 
					
						
							|  |  |  | # REQUIREMENTS: --- | 
					
						
							|  |  |  | #         BUGS: --- | 
					
						
							|  |  |  | #        NOTES: 2022-02-15 Had to revert to DBD::MySQL temporarily. | 
					
						
							|  |  |  | #               2023-06-12 This code is no longer valid. It expects the main | 
					
						
							|  |  |  | #               HTML to point to files on the server (if there are "assets") | 
					
						
							|  |  |  | #               and recurses through extra HTML it finds there. It currently | 
					
						
							|  |  |  | #               looks for file details in any sub-directory "/eps/hpr1234/". | 
					
						
							|  |  |  | #               Now there are no files, so the details of all assets | 
					
						
							|  |  |  | #               (including the audio) needs to be in the 'assets' table. Not | 
					
						
							|  |  |  | #               sure we are there yet. | 
					
						
							|  |  |  | #               2024-03-08 Smartmatch is deprecated; moved to 'match::smart'. | 
					
						
							|  |  |  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | 
					
						
							|  |  |  | #      VERSION: 0.2.19 | 
					
						
							|  |  |  | #      CREATED: 2017-06-14 10:50:28 | 
					
						
							|  |  |  | #     REVISION: 2024-03-08 20:49:51 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | use v5.36; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | use strict; | 
					
						
							|  |  |  | use warnings; | 
					
						
							|  |  |  | use utf8; | 
					
						
							|  |  |  | #use experimental 'smartmatch'; | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | # TODO: use experimental::try; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | use match::smart; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Carp; | 
					
						
							|  |  |  | use Getopt::Long; | 
					
						
							|  |  |  | use Pod::Usage; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Config::General; | 
					
						
							|  |  |  | use File::Slurper qw{ read_text read_lines }; | 
					
						
							|  |  |  | use JSON; | 
					
						
							|  |  |  | use Net::OpenSSH; | 
					
						
							|  |  |  | #use TryCatch; # Broke in late May 2020 due to a problem with Devel::Declare | 
					
						
							|  |  |  | use Try::Tiny; | 
					
						
							|  |  |  | use Log::Handler; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use HTML::TreeBuilder 5 -weak; | 
					
						
							|  |  |  | use HTML::Entities; | 
					
						
							|  |  |  | use List::Util qw{ min max }; | 
					
						
							|  |  |  | use List::MoreUtils qw{ any }; | 
					
						
							|  |  |  | use LWP::Simple; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use DBI; | 
					
						
							|  |  |  | use SQL::Abstract; | 
					
						
							|  |  |  | use SQL::Abstract::Plugin::InsertMulti; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Data::Dumper; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Version number (manually incremented) | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | our $VERSION = '0.2.19'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Script and directory names | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | ( my $PROG = $0 ) =~ s|.*/||mx; | 
					
						
							|  |  |  | #( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | 
					
						
							|  |  |  | #$DIR = '.' unless $DIR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Declarations | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Constants and other declarations | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $basedir       = "$ENV{HOME}/HPR/IA"; | 
					
						
							|  |  |  | my $logdir        = "$basedir/logs"; | 
					
						
							|  |  |  | my $logfile       = "$logdir/${PROG}.log"; | 
					
						
							|  |  |  | my $configfile    = "$basedir/.$PROG.cfg"; | 
					
						
							|  |  |  | my $db1configfile = "$basedir/.hpr_db.cfg"; | 
					
						
							|  |  |  | my $database2     = "$basedir/ia.db"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my ( $dbh1, $dbh2, $sth1, $h1 ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # NOTE: Program config not currently used | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | die "Configuration file $configfile not found\n" unless ( -e $configfile ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $conf = Config::General->new( | 
					
						
							|  |  |  |     -ConfigFile      => $configfile, | 
					
						
							|  |  |  |     -InterPolateVars => 1, | 
					
						
							|  |  |  |     -ExtendedAccess  => 1, | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | my %config = $conf->getall(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Enable Unicode mode | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | binmode STDOUT, ":encoding(UTF-8)"; | 
					
						
							|  |  |  | binmode STDERR, ":encoding(UTF-8)"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Options | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Option defaults | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $DEFDEBUG = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Options and arguments | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my %options; | 
					
						
							|  |  |  | Options( \%options ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Default help shows minimal information | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 ) | 
					
						
							|  |  |  |     if ( $options{'help'} ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # The -documentation or -man option shows the full POD documentation through | 
					
						
							|  |  |  | # a pager for convenience | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 ) | 
					
						
							|  |  |  |     if ( $options{'documentation'} ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $db1cfgfile | 
					
						
							|  |  |  |     = ( defined( $options{dbconfig} ) ? $options{dbconfig} : $db1configfile ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 ); | 
					
						
							|  |  |  | my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $mode = $options{mode}; | 
					
						
							|  |  |  | die "Mode is required\n" unless defined($mode); | 
					
						
							|  |  |  | $mode = lc($mode); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my @episodes; | 
					
						
							|  |  |  | @episodes = @{ $options{episode} } if ( defined( $options{episode} ) ); | 
					
						
							|  |  |  | if ($mode ne 'update' and @episodes) { | 
					
						
							|  |  |  |     die "The -episode=N options are only for use in update mode\n"; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my @range; | 
					
						
							|  |  |  | @range = @{ $options{range} } if ( defined( $options{range} ) ); | 
					
						
							|  |  |  | if ($mode ne 'update' and @range) { | 
					
						
							|  |  |  |     die "The -range=N options are only for use in update mode\n"; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _debug( $DEBUG >= 3, '@episodes: ' . Dumper( \@episodes ) ); | 
					
						
							|  |  |  | _debug( $DEBUG >= 3, '@range: ' . Dumper( \@range ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Check -episode=N versus -range=N. We can't have both | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | die "Choose only -episode=N or -range=N, not both\n" | 
					
						
							|  |  |  |     if ( defined( $options{episode} ) && defined( $options{range} ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Only two range values | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | die "-range=N should not be used more than twice\n" if ($#range gt 2); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Turn the range into a list of episodes | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if (defined($options{range})) { | 
					
						
							|  |  |  |     @episodes = ( min(@range) .. max(@range) ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @episodes = map { sprintf('%04d',$_)} @episodes; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _debug( $DEBUG >= 3, '@episodes: ' . Dumper(\@episodes) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | die "Too many episodes requested\n" if (scalar(@episodes) > 20); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $json = $options{json}; | 
					
						
							|  |  |  | if ( $mode eq 'json' ) { | 
					
						
							|  |  |  |     die "JSON file is required\n" unless defined($json); | 
					
						
							|  |  |  |     die "Unable to find file '$json'\n" unless -e $json; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $dirfile = $options{scandir}; | 
					
						
							|  |  |  | if ( $mode eq 'scandir' ) { | 
					
						
							|  |  |  |     die "Directory listing file is required\n" unless defined($dirfile); | 
					
						
							|  |  |  |     die "Unable to find file '$dirfile'\n" unless -e $dirfile; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | die "Configuration file $db1cfgfile not found\n" unless ( -e $db1cfgfile ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Set up logging keeping the default log layout except for the date. The format | 
					
						
							|  |  |  | # is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is | 
					
						
							|  |  |  | # the message. | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $log = Log::Handler->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | $log->add( | 
					
						
							|  |  |  |     file => { | 
					
						
							|  |  |  |         timeformat => "%Y-%m-%d %H:%M:%S", | 
					
						
							|  |  |  |         filename   => $logfile, | 
					
						
							|  |  |  |         minlevel   => 0, | 
					
						
							|  |  |  |         maxlevel   => 7, | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Load MySQL configuration data | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $db1conf = Config::General->new( | 
					
						
							|  |  |  |     -ConfigFile      => $db1cfgfile, | 
					
						
							|  |  |  |     -InterPolateVars => 1, | 
					
						
							|  |  |  |     -ExtendedAccess  => 1, | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | my %db1cfg = $db1conf->getall(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if ($verbose) { | 
					
						
							|  |  |  |     print "MySQL database configuration chosen: $db1cfgfile\n"; | 
					
						
							|  |  |  |     print "Dry run\n" if $dry_run; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Connect to the MariaDB database [reverted to mysql temporarily] | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $dbhost = $db1cfg{database}->{host} // '127.0.0.1'; | 
					
						
							|  |  |  | my $dbport = $db1cfg{database}->{port} // 3306; | 
					
						
							|  |  |  | my $dbname = $db1cfg{database}->{name}; | 
					
						
							|  |  |  | my $dbuser = $db1cfg{database}->{user}; | 
					
						
							|  |  |  | my $dbpwd  = $db1cfg{database}->{password}; | 
					
						
							|  |  |  | #$dbh1 = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport", | 
					
						
							|  |  |  | #    $dbuser, $dbpwd, { AutoCommit => 1 } ) | 
					
						
							|  |  |  | #    or croak $DBI::errstr; | 
					
						
							|  |  |  | $dbh1 = DBI->connect( "DBI:mysql:database=$dbname;host=$dbhost;port=$dbport", | 
					
						
							|  |  |  |     $dbuser, $dbpwd, { AutoCommit => 1 } ) | 
					
						
							|  |  |  |     or croak $DBI::errstr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Enable client-side UTF8 [temporary] | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $dbh1->{mysql_enable_utf8} = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Connect to the SQLite database | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | $dbh2 = DBI->connect( "dbi:SQLite:dbname=$database2", "", "" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Log the starting conditions | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $log->info("Starting $PROG version $VERSION"); | 
					
						
							|  |  |  | if (@episodes) { | 
					
						
							|  |  |  |     $log->info('Episodes:', join( ", ", @episodes ) ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     $log->info('Looking for new episodes'); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | $log->info("Mode: $mode"); | 
					
						
							|  |  |  | if ($dry_run) { | 
					
						
							|  |  |  |     $log->info('Dry run: Yes'); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Set up for connecting to the server with SSH | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $remhost = 'hackerpublicradio.org'; | 
					
						
							|  |  |  | #my $remport = 22074; | 
					
						
							|  |  |  | my $remport = 22; | 
					
						
							|  |  |  | my $remuser = 'hpr'; | 
					
						
							|  |  |  | my $rempath = '/home/hpr/www/eps/'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $ssh = Net::OpenSSH->new( $remhost, user => $remuser, port => $remport ); | 
					
						
							|  |  |  | $ssh->error and die "Can't ssh to $remhost: " . $ssh->error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | $log->info("Connected to ${remuser}\@${remhost}, port $remport"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Perform the action requested by -mode=X | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | if ( $mode eq 'initialise' ) { | 
					
						
							|  |  |  |     print "Initialise mode\n"; | 
					
						
							|  |  |  |     initialise( $dbh1, $dbh2 ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | elsif ( $mode eq 'update' ) { | 
					
						
							|  |  |  |     print "Update mode\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Range updating is achieved by running the single-shot mode repeatedly | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if ( @episodes ) { | 
					
						
							|  |  |  |         foreach my $episode (@episodes) { | 
					
						
							|  |  |  |             update_episode( $dbh1, $dbh2, $episode, $dry_run, $verbose ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         update_db( $dbh1, $dbh2, $dry_run, $verbose ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | elsif ( $mode eq 'json' ) { | 
					
						
							|  |  |  |     print "JSON mode\n"; | 
					
						
							|  |  |  |     load_metadata( $dbh2, $json, $dry_run, $verbose ); | 
					
						
							|  |  |  |     #load_upload_dates( $dbh2, $json ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | elsif ( $mode eq 'scandir' ) { | 
					
						
							|  |  |  |     print "Scan directory mode\n"; | 
					
						
							|  |  |  |     scan_dir( $dbh2, $dirfile ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     die "Invalid mode: $mode\n"; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | exit; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: initialise | 
					
						
							|  |  |  | #      PURPOSE: Initialise an empty database | 
					
						
							|  |  |  | #   PARAMETERS: $dbh1           Handle for the MySQL database | 
					
						
							|  |  |  | #               $dbh2           Handle for the SQLite database | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Runs the various initialisation functions, but only if the | 
					
						
							|  |  |  | #               database tables 'episodes' and/or 'assets' are empty | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub initialise { | 
					
						
							|  |  |  |     my ( $dbh1, $dbh2 ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ($count); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Initialise the 'episodes' table if it's empty | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $count = count_rows( $dbh2, 'episodes' ); | 
					
						
							|  |  |  |     if ( $count == 0 ) { | 
					
						
							|  |  |  |         print "Initialising the 'episodes' table\n"; | 
					
						
							|  |  |  |         initialise_episodes( $dbh1, $dbh2 ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         print "Did not initialise the 'episodes' table ($count rows)\n"; | 
					
						
							|  |  |  |         $log->info("Did not initialise the 'episodes' table ($count rows)"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Initialise the 'assets' table if it's empty | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $count = count_rows( $dbh2, 'assets' ); | 
					
						
							|  |  |  |     if ( $count == 0 ) { | 
					
						
							|  |  |  |         print "Initialising the 'assets' table\n"; | 
					
						
							|  |  |  |         initialise_assets( $dbh1, $dbh2 ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         print "Did not initialise the 'assets' table ($count rows)\n"; | 
					
						
							|  |  |  |         $log->info("Did not initialise the 'assets' table ($count rows)"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: initialise_episodes | 
					
						
							|  |  |  | #      PURPOSE: Initialise an empty 'episodes' table | 
					
						
							|  |  |  | #   PARAMETERS: $dbh1           Handle for the MySQL database | 
					
						
							|  |  |  | #               $dbh2           Handle for the SQLite database | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Assuming the episodes table is empty loads all episodes from | 
					
						
							|  |  |  | #               the MariaDB database | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub initialise_episodes { | 
					
						
							|  |  |  |     my ( $dbh1, $dbh2 ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sql1, $sth1, $h1, $count ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sql1 = q{ | 
					
						
							|  |  |  |         SELECT | 
					
						
							|  |  |  |             id, | 
					
						
							|  |  |  |             date, | 
					
						
							|  |  |  |             title, | 
					
						
							|  |  |  |             summary | 
					
						
							|  |  |  |         FROM eps | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sth1 = $dbh1->prepare($sql1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sth1->execute(); | 
					
						
							|  |  |  |     if ( $dbh1->err ) { | 
					
						
							|  |  |  |         croak $dbh1->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $count = 0; | 
					
						
							|  |  |  |     while ( $h1 = $sth1->fetchrow_hashref ) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Fields are: | 
					
						
							|  |  |  |         # id                    integer, | 
					
						
							|  |  |  |         # rdate                 date NOT NULL, | 
					
						
							|  |  |  |         # title                 varchar ( 100 ) NOT NULL, | 
					
						
							|  |  |  |         # summary               varchar ( 100 ) NOT NULL, | 
					
						
							|  |  |  |         # uploaded              integer DEFAULT 0, | 
					
						
							|  |  |  |         # has_files             INTEGER DEFAULT 0, | 
					
						
							|  |  |  |         # with_files            INTEGER DEFAULT 0, | 
					
						
							|  |  |  |         # with_derived          INTEGER DEFAULT 0, | 
					
						
							|  |  |  |         # with_source           INTEGER DEFAULT 0, | 
					
						
							|  |  |  |         # archive_date          date, | 
					
						
							|  |  |  |         # item_last_updated     integer | 
					
						
							|  |  |  |         # IA_URL                text, | 
					
						
							|  |  |  |         # notes                 text, | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         #>>> | 
					
						
							|  |  |  |         $dbh2->do( | 
					
						
							|  |  |  |             q{INSERT INTO episodes (id, rdate, title, summary) VALUES(?,?,?,?)}, | 
					
						
							|  |  |  |             undef, | 
					
						
							|  |  |  |             $h1->{id}, | 
					
						
							|  |  |  |             $h1->{date}, | 
					
						
							|  |  |  |             $h1->{title}, | 
					
						
							|  |  |  |             $h1->{summary}, | 
					
						
							|  |  |  |         ) or die $dbh2->errstr; | 
					
						
							|  |  |  |         #<<< | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $count++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print "Records added: $count\n"; | 
					
						
							|  |  |  |     $log->info("Records added to 'episodes' table: $count"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: initialise_assets | 
					
						
							|  |  |  | #      PURPOSE: Initialise an empty 'assets' table (during database | 
					
						
							|  |  |  | #               initialisation) | 
					
						
							|  |  |  | #   PARAMETERS: $dbh1           Handle for the MariaDB database | 
					
						
							|  |  |  | #               $dbh2           Handle for the SQLite database | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Assuming the assets table is empty loads all assets (links to | 
					
						
							|  |  |  | #               files on the HPR server) into the table | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub initialise_assets { | 
					
						
							|  |  |  |     my ( $dbh1, $dbh2 ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sql1, $sth1, $h1, $count, $links, $id, $episode, $notes ); | 
					
						
							|  |  |  |     my %assets; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Collect the notes for shows that have links to other HPR files | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql1 = q{ | 
					
						
							|  |  |  |         SELECT | 
					
						
							|  |  |  |             id, | 
					
						
							|  |  |  |             notes | 
					
						
							|  |  |  |         FROM eps | 
					
						
							|  |  |  |         WHERE notes REGEXP 'https?://(www.)?(hacker|hobby)publicradio.org/eps/' | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | #        LIMIT 20 OFFSET 30 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sth1 = $dbh1->prepare($sql1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sth1->execute(); | 
					
						
							|  |  |  |     if ( $dbh1->err ) { | 
					
						
							|  |  |  |         croak $dbh1->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Process notes one show at a time, recursing into HTML where necessary | 
					
						
							|  |  |  |     # and building the %assets hash | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $count = 0; | 
					
						
							|  |  |  |     while ( $h1 = $sth1->fetchrow_hashref ) { | 
					
						
							|  |  |  |         $links = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $id = $h1->{id}; | 
					
						
							|  |  |  |         $episode = sprintf("%04d", $id); | 
					
						
							|  |  |  |         $notes = $h1->{notes}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         print "Episode: $id\n"; | 
					
						
							|  |  |  |         #$links = find_links($id,$notes,\%assets); | 
					
						
							|  |  |  |         $links = find_assets($ssh,$episode,$rempath,\%assets); | 
					
						
							|  |  |  |         print "Links found: $links\n"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     #print Dumper(\%assets),"\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Use the %assets hash to populate the 'assets' table | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $key (sort(keys(%assets))) { | 
					
						
							|  |  |  |         foreach my $obj (@{$assets{$key}}) { | 
					
						
							|  |  |  |             #>>> | 
					
						
							|  |  |  |             $dbh2->do( | 
					
						
							|  |  |  |                 q{INSERT INTO assets (episode_id, URL, filename) VALUES(?,?,?)}, | 
					
						
							|  |  |  |                 undef, | 
					
						
							|  |  |  |                 $key, | 
					
						
							|  |  |  |                 $obj->{URL}, | 
					
						
							|  |  |  |                 $obj->{filename}, | 
					
						
							|  |  |  |             ) or die $dbh2->errstr; | 
					
						
							|  |  |  |             #<<< | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $count++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print "Records added: $count\n"; | 
					
						
							|  |  |  |     $log->info("Records added to 'assets' table: $count"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: update_db | 
					
						
							|  |  |  | #      PURPOSE: Loads any additional rows from the MySQL database to the | 
					
						
							|  |  |  | #               SQLite one. | 
					
						
							|  |  |  | #   PARAMETERS: $dbh1           Handle for the MySQL database | 
					
						
							|  |  |  | #               $dbh2           Handle for the SQLite database | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Finds the first gap in the 'id' field of the 'episodes' table | 
					
						
							|  |  |  | #               in the SQLite database. This is the starting show number it | 
					
						
							|  |  |  | #               uses when querying the MySQL database. The function loops over | 
					
						
							|  |  |  | #               the 'eps' table in the MySQL database returning details for | 
					
						
							|  |  |  | #               storage in the SQLite database. Before storing it double | 
					
						
							|  |  |  | #               checks that the row in question does not exist. | 
					
						
							|  |  |  | #               TODO | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub update_db { | 
					
						
							|  |  |  |     my ( $dbh1, $dbh2, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sql1, $sql2, $sth1, $h1, $sth2, $h2 ); | 
					
						
							|  |  |  |     my ( $ep_count, $ffree); | 
					
						
							|  |  |  |     my ( %assets, $asset_count, $assets_added, $assets_deleted, $assets_updated ); | 
					
						
							|  |  |  |     my ( $id, $episode, $notes, $links ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print "Updating SQLite database from MySQL\n" if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Find the first episode id gap in the SQLite database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql1 = q{ | 
					
						
							|  |  |  |         SELECT MIN (id) + 1 AS first_free | 
					
						
							|  |  |  |         FROM episodes t1 | 
					
						
							|  |  |  |         WHERE NOT EXISTS ( | 
					
						
							|  |  |  |             SELECT * | 
					
						
							|  |  |  |             FROM episodes t2 | 
					
						
							|  |  |  |             WHERE t1.id + 1 = t2.id) | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sth1 = $dbh2->prepare($sql1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $sth1->execute(); | 
					
						
							|  |  |  |     if ( $dbh1->err ) { | 
					
						
							|  |  |  |         croak $dbh1->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $h1    = $sth1->fetchrow_hashref; | 
					
						
							|  |  |  |     $ffree = $h1->{first_free}; | 
					
						
							|  |  |  |     print "First free id: $ffree\n" if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # SQL to find new rows in the MySQL database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql1 = q{ | 
					
						
							|  |  |  |         SELECT | 
					
						
							|  |  |  |             id, | 
					
						
							|  |  |  |             date, | 
					
						
							|  |  |  |             title, | 
					
						
							|  |  |  |             summary, | 
					
						
							|  |  |  |             notes | 
					
						
							|  |  |  |         FROM eps | 
					
						
							|  |  |  |         WHERE id >= ? | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # SQL to look for pre-existing episodes in the SQLite database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql2 = q{ | 
					
						
							|  |  |  |         SELECT * FROM episodes WHERE id = ? | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Prepare the SQL statements | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sth1 = $dbh1->prepare($sql1); | 
					
						
							|  |  |  |     $sth2 = $dbh2->prepare($sql2); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Query MySQL, from $ffree onwards | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sth1->execute($ffree); | 
					
						
							|  |  |  |     if ( $dbh1->err ) { | 
					
						
							|  |  |  |         croak $dbh1->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Loop through the MySQL rows | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $ep_count = $asset_count = 0; | 
					
						
							|  |  |  |     while ( $h1 = $sth1->fetchrow_hashref ) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Query the SQLite database for the episode number we just got | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $sth2->execute( $h1->{id} ); | 
					
						
							|  |  |  |         if ( $dbh2->err ) { | 
					
						
							|  |  |  |             croak $dbh2->errstr; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         $h2 = $sth2->fetchrow_hashref; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # If the row isn't already there create it | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         unless ($h2) { | 
					
						
							|  |  |  |             unless ($dry_run) { | 
					
						
							|  |  |  |                 #>>> | 
					
						
							|  |  |  |                 $dbh2->do( | 
					
						
							|  |  |  |                     q{INSERT INTO episodes (id, rdate, title, summary) VALUES(?,?,?,?)}, | 
					
						
							|  |  |  |                     undef, | 
					
						
							|  |  |  |                     $h1->{id}, | 
					
						
							|  |  |  |                     $h1->{date}, | 
					
						
							|  |  |  |                     $h1->{title}, | 
					
						
							|  |  |  |                     $h1->{summary}, | 
					
						
							|  |  |  |                 ) or die $dbh2->errstr; | 
					
						
							|  |  |  |                 #<<< | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 printf "Added episode: %d\n", $h1->{id} if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 $ep_count++; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 printf | 
					
						
							|  |  |  |                     "Would have inserted a row into the 'episodes' table (%s)\n", | 
					
						
							|  |  |  |                     $h1->{id}; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Check this show for "assets" (whether we have the episode already or | 
					
						
							|  |  |  |         # not). Even in dry-run mode this gets executed | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $links = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $id = $h1->{id}; | 
					
						
							|  |  |  |         $episode = sprintf("%04d", $id); | 
					
						
							|  |  |  |         # $notes = $h1->{notes}; | 
					
						
							|  |  |  |         # $links = find_links($id,$notes,\%assets); | 
					
						
							|  |  |  |         $links = find_assets($ssh,$episode,$rempath,\%assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $asset_count += $links; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # In debug mode show what assets were collected as we iterated through the | 
					
						
							|  |  |  |     # new shows. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, '%assets: ' . Dumper(\%assets) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Add any assets collected from the episodes just processed | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $assets_added = 0; | 
					
						
							|  |  |  |     if ($asset_count > 0) { | 
					
						
							|  |  |  |         ( $assets_added, $assets_deleted, $assets_updated ) = | 
					
						
							|  |  |  |             process_assets( $dbh2, \%assets, 1, $dry_run, $verbose ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Episodes added:", $ep_count; | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets found:",   $asset_count; | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets added:",   coalesce( $assets_added,   0 ); | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets deleted:", coalesce( $assets_deleted, 0 ); | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets updated:", coalesce( $assets_updated, 0 ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $log->info("Episodes added: $ep_count"); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: process_assets | 
					
						
							|  |  |  | #      PURPOSE: Given a collection of asset details obtained by searching | 
					
						
							|  |  |  | #               notes for a given range of episodes determine whether they | 
					
						
							|  |  |  | #               need to be added to the SQLite database. | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            SQLite database handle | 
					
						
							|  |  |  | #               $assets         hashref pointing to the collected assets which | 
					
						
							|  |  |  | #                               have been found by parsing the notes of all | 
					
						
							|  |  |  | #                               relevant shows prior to calling this routine. | 
					
						
							|  |  |  | #               $new            Boolean flag to indicate that we're dealing with a new | 
					
						
							|  |  |  | #                               show or updating an old one | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Number of assets added | 
					
						
							|  |  |  | #  DESCRIPTION: The %assets hash has been accumulated by scanning all the | 
					
						
							|  |  |  | #               notes belonging to new episodes found in the MySQL database. | 
					
						
							|  |  |  | #               The hash is keyed by the episode number and it is scanned in | 
					
						
							|  |  |  | #               sorted order of the key. The SQLite database is queried for | 
					
						
							|  |  |  | #               assets with this key and the result held in a hash keyed by | 
					
						
							|  |  |  | #               the filename. The value of each key in the %assets hash is an | 
					
						
							|  |  |  | #               array of hashes holding the asset details, so we loop through | 
					
						
							|  |  |  | #               these. If the filename of a particular asset hash is not found | 
					
						
							|  |  |  | #               in the hash from the database then we create a new row in the | 
					
						
							|  |  |  | #               'assets' table (assuming that we're not in dry run mode where | 
					
						
							|  |  |  | #               we simply report what we would have done). If any assets were | 
					
						
							|  |  |  | #               added for an episode then we set the flag 'has_files' (unless | 
					
						
							|  |  |  | #               in dry run mode). The total number of assets added to the | 
					
						
							|  |  |  | #               database is returned by the function. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub process_assets { | 
					
						
							|  |  |  |     my ( $dbh, $assets, $new, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sql, $sth, $db_assets, $key_field, $db_count ); | 
					
						
							|  |  |  |     my ( $asset_count, $assets_added, $assets_deleted, $assets_updated ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $key_field = 'id'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # We should only be called if there are assets, but just in case... | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return ( 0, 0, 0 ) unless ($assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # SQL to look for pre-existing assets in the SQLite database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql = q{ | 
					
						
							|  |  |  |         SELECT * FROM assets WHERE episode_id = ? | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  |     $sth = $dbh->prepare($sql); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     say 'Processing assets for ', ( $new ? 'new' : 'existing' ), ' show' | 
					
						
							|  |  |  |         if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Use the %assets hash to populate the 'assets' table. The hash is keyed | 
					
						
							|  |  |  |     # by episode numbers, and each entry consists of an array of hashes each | 
					
						
							|  |  |  |     # containing the filename and the URL of the asset. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $assets_added = $assets_deleted = $assets_updated = 0; | 
					
						
							|  |  |  |     foreach my $key ( sort( keys(%$assets) ) ) { | 
					
						
							|  |  |  |         print "Checking episode $key\n" if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Count the assets for the current show | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $asset_count = scalar( @{ $assets->{$key} } ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         _debug( $DEBUG >= 2, "Episode: $key, Asset_count: $asset_count" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |        # | 
					
						
							|  |  |  |        # Query the SQLite database for the episode number we just got, looking | 
					
						
							|  |  |  |        # for entries in the 'assets' table | 
					
						
							|  |  |  |        # | 
					
						
							|  |  |  |         $sth->execute($key); | 
					
						
							|  |  |  |         if ( $dbh->err ) { | 
					
						
							|  |  |  |             croak $dbh->errstr; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Grab everything as a hash keyed on the 'filename' field so we can | 
					
						
							|  |  |  |         # check it easily. Count what we got (i.e. count the number of assets | 
					
						
							|  |  |  |         # in SQLite for this episode). | 
					
						
							|  |  |  |         # NOTE: trying the idea of indexing $db_assets by the 'id' field (in | 
					
						
							|  |  |  |         # $key_field) | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         #        $db_assets = $sth->fetchall_hashref('filename'); | 
					
						
							|  |  |  |         $db_assets = $sth->fetchall_hashref($key_field); | 
					
						
							|  |  |  |         $db_count  = scalar( keys(%$db_assets) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         _debug( $DEBUG >= 2, "Assets in ia.db:\n" . Dumper($db_assets) ); | 
					
						
							|  |  |  |         _debug( $DEBUG >= 2, "DB count: $db_count" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Compare the number of "real" assets with what SQLite holds. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ( $asset_count > $db_count ) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # We need to add assets. If it's a newly added show which has | 
					
						
							|  |  |  |             # assets this will always be necessary. If it's an existing show | 
					
						
							|  |  |  |             # then we're repairing an anomaly and want to say so. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             if ($new) { | 
					
						
							|  |  |  |                 say "Adding assets" if $verbose; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 say "** Found more real assets than in DB -> adding"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             $assets_added += insert_assets( $dbh, $key, $assets, $db_assets, | 
					
						
							|  |  |  |                 $dry_run, $verbose ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         elsif ( $asset_count < $db_count ) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # We need to delete assets. This can't happen for an existing | 
					
						
							|  |  |  |             # show, so it's always a noteworthy event. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             say "** Found fewer real assets than in DB -> deleting"; | 
					
						
							|  |  |  |             $assets_deleted += delete_assets( $dbh, $key, $assets, $db_assets, | 
					
						
							|  |  |  |                 $dry_run, $verbose ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Look for field differences in what we have after the inserts and | 
					
						
							|  |  |  |         # deletes | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ( $assets_added > 0 || $assets_deleted > 0 ) { | 
					
						
							|  |  |  |             $sth->execute($key); | 
					
						
							|  |  |  |             if ( $dbh->err ) { | 
					
						
							|  |  |  |                 croak $dbh->errstr; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             $db_assets = $sth->fetchall_hashref($key_field); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         my @results | 
					
						
							|  |  |  |             = synchronise_found_assets( $dbh, $key, $assets, $db_assets, | 
					
						
							|  |  |  |             $dry_run, $verbose ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $assets_added   += $results[0]; | 
					
						
							|  |  |  |         $assets_deleted += $results[1]; | 
					
						
							|  |  |  |         $assets_updated += $results[2]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         _debug( $DEBUG >= 4, '@results ' . Dumper( \@results ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # If there are assets then we need to ensure the 'has_files' setting | 
					
						
							|  |  |  |         # reflects it (unless in dry run mode) | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ( $asset_count > 0 ) { | 
					
						
							|  |  |  |             unless ($dry_run) { | 
					
						
							|  |  |  |                 $dbh->do( | 
					
						
							|  |  |  |                     q{ | 
					
						
							|  |  |  |                         UPDATE episodes | 
					
						
							|  |  |  |                         SET has_files = 1 | 
					
						
							|  |  |  |                         WHERE id = ? | 
					
						
							|  |  |  |                         AND has_files = 0 | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                     undef, | 
					
						
							|  |  |  |                     $key, | 
					
						
							|  |  |  |                 ) or die $dbh->errstr; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 say "Would have updated the 'episodes.has_files' ", | 
					
						
							|  |  |  |                     "field for $key (if needed)"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         #        $assets_added += $asset_count; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ( $assets_added, $assets_deleted, $assets_updated ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: synchronise_found_assets | 
					
						
							|  |  |  | #      PURPOSE: Compares the assets found by searching the shownotes (and | 
					
						
							|  |  |  | #               other files) with those in the SQLite database and updates | 
					
						
							|  |  |  | #               what differs. | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            SQLite database handle | 
					
						
							|  |  |  | #               $episode        Numeric episode | 
					
						
							|  |  |  | #               $assets         Hashref pointing to the collected assets which | 
					
						
							|  |  |  | #                               have been found by parsing the show's notes; | 
					
						
							|  |  |  | #                               keyed by episode number. Each keyed object is | 
					
						
							|  |  |  | #                               an arrayref of hashes containing the URL and | 
					
						
							|  |  |  | #                               filename of the asset. | 
					
						
							|  |  |  | #               $db_assets      Hash of assets from SQLite, keyed by filename. | 
					
						
							|  |  |  | #                               Each keyed object is a hash of columns from | 
					
						
							|  |  |  | #                               the database: id, episode_id, URL, filename, | 
					
						
							|  |  |  | #                               uploaded. | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Number of additions, deletions and updates | 
					
						
							|  |  |  | #  DESCRIPTION: The assets for this episode have been found by searching the | 
					
						
							|  |  |  | #               HTML of the show notes and any linked HTML files for files on | 
					
						
							|  |  |  | #               the HPR server.  The equivalent assets have also been | 
					
						
							|  |  |  | #               collected from the SQLite database. A comparison is made | 
					
						
							|  |  |  | #               between these items and if they differ the definitive details | 
					
						
							|  |  |  | #               collected from the MySQL database are used. There are three | 
					
						
							|  |  |  | #               actions possible: (1) if the URL or filename fields differ | 
					
						
							|  |  |  | #               then we can work out the update and apply it, (2) if the | 
					
						
							|  |  |  | #               incoming assets don't match any in those from the database | 
					
						
							|  |  |  | #               then we can delete the database ones, and (3) we can add the | 
					
						
							|  |  |  | #               incoming assets we also couldn't match because they'll be the | 
					
						
							|  |  |  | #               replecements!  For all actions changes are made to the | 
					
						
							|  |  |  | #               versions in the SQLite database using SQL::Abstract to | 
					
						
							|  |  |  | #               construct the queries to do it. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: TODO: There may be multiple asset changes for the episode. | 
					
						
							|  |  |  | #               Does the SQL::Abstract call perform the multiple changes? | 
					
						
							|  |  |  | #               Needs testing. | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub synchronise_found_assets { | 
					
						
							|  |  |  |     my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sth, $additions, $deletions, $updates, $update_count, $index); | 
					
						
							|  |  |  |     my ( @fieldvals, @where, @db_rows, @asset_rows, %matches, %changes ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $additions = $deletions = $updates = 0; | 
					
						
							|  |  |  |     $update_count = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Temporary? | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if ( $DEBUG >= 4 ) { | 
					
						
							|  |  |  |         _debug( 1, 'In subroutine synchronise_found_assets' ); | 
					
						
							|  |  |  |         _debug( 1, "\$episode: $episode" ); | 
					
						
							|  |  |  |         _debug( 1, '%$assets: ' . Dumper($assets) ); | 
					
						
							|  |  |  |         _debug( 1, '%$db_assets: ' . Dumper($db_assets) ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Looking for assets that match - to exclude them' ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @db_rows = ( keys(%$db_assets) ); | 
					
						
							|  |  |  |     @asset_rows = ( 0 .. scalar( @{ $assets->{$episode} } ) - 1 ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Initial @db_rows ' . Dumper( \@db_rows ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Initial @asset_rows ' . Dumper( \@asset_rows ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Find any asset pair matches | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $index = 0; | 
					
						
							|  |  |  |     foreach my $db_key ( keys(%$db_assets) ) { | 
					
						
							|  |  |  |         foreach my $a_obj ( @{ $assets->{$episode} } ) { | 
					
						
							|  |  |  |             if ( assets_match( $a_obj, $db_assets->{$db_key} ) == 2 ) { | 
					
						
							|  |  |  |                 $matches{$db_key} = [ $a_obj, $db_assets->{$db_key} ]; | 
					
						
							|  |  |  | #                splice( @asset_rows, $index, 1 ); | 
					
						
							|  |  |  |                 @asset_rows = grep { $_ != $index } @asset_rows; | 
					
						
							|  |  |  |                 $index++; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Assets to ignore: %matches ' . Dumper( \%matches ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Current @asset_rows ' . Dumper( \@asset_rows ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # If everything matched then there's nothing to do, so return zero and | 
					
						
							|  |  |  |     # leave. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  | #    if (scalar(@db_rows) == scalar( keys(%matches) )) { | 
					
						
							|  |  |  |     if ( scalar(@asset_rows) == 0 ) { | 
					
						
							|  |  |  |         _debug( $DEBUG >= 4, 'All rows match, so nothing to do' ); | 
					
						
							|  |  |  |         return ( 0, 0, 0 ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Some rows do not match' ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Remove the row ids for any matches | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     @db_rows = grep { !exists( $matches{$_} ) } @db_rows; | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, '@db_rows after matches ' . Dumper( \@db_rows ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, '@asset_rows after matches ' . Dumper( \@asset_rows ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Looking for assets that match in only one attribute' ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Find any cases where only one element of the asset object matches. This | 
					
						
							|  |  |  |     # means we have a simple change. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $index = 0; | 
					
						
							|  |  |  |     foreach my $db_key (@db_rows) { | 
					
						
							|  |  |  |         foreach my $a_obj ( @{ $assets->{$episode} } ) { | 
					
						
							|  |  |  |             if ( assets_match( $a_obj, $db_assets->{$db_key} ) == 1 ) { | 
					
						
							|  |  |  |                 $changes{$db_key} = [ $a_obj, $db_assets->{$db_key} ]; | 
					
						
							|  |  |  |                 @asset_rows = grep { $_ != $index } @asset_rows; | 
					
						
							|  |  |  |                 $index++; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Assets to change: %changes ' . Dumper( \%changes ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, 'Current @asset_rows ' . Dumper( \@asset_rows ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Remove the row ids for any changes | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     @db_rows = grep { !exists( $changes{$_} ) } @db_rows; | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, '@db_rows after changes ' . Dumper( \@db_rows ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 4, '@asset_rows after changes ' . Dumper( \@asset_rows ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # If there are changes, do them now | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if (%changes) { | 
					
						
							|  |  |  |         _debug( $DEBUG >= 4, 'Collecting changes' ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # We may have multiple database rows to process | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $index        = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach my $key ( keys(%changes) ) { | 
					
						
							|  |  |  |             $fieldvals[$index] = {}; | 
					
						
							|  |  |  |             $where[$index]     = { episode_id => $episode }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Compare the fields we're interested in, accumulating differences | 
					
						
							|  |  |  |             # (expecting only one per key). | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             foreach my $fld ( 'filename', 'URL' ) { | 
					
						
							|  |  |  |                 if (!equal( | 
					
						
							|  |  |  |                         $changes{$key}->[0]->{$fld}, | 
					
						
							|  |  |  |                         $changes{$key}->[1]->{$fld} | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     $update_count++; | 
					
						
							|  |  |  |                     $fieldvals[$index]->{$fld} = $changes{$key}->[0]->{$fld}; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $where[$index]->{id} = $key; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $index++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, '@fieldvals: ' . Dumper( \@fieldvals ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, '@where ' . Dumper( \@where ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( $update_count > 0 ) { | 
					
						
							|  |  |  |         _debug( $DEBUG >= 4, 'Performing changes' ); | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Prepare to use SQL::Abstract | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach my $fv (@fieldvals) { | 
					
						
							|  |  |  |             my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |                 = $sql->update( 'assets', $fv, shift(@where) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Perform the updates (unless in dry-run mode) | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             unless ($dry_run) { | 
					
						
							|  |  |  |                 $sth = $dbh->prepare($stmt); | 
					
						
							|  |  |  |                 $sth->execute(@bindvals); | 
					
						
							|  |  |  |                 if ( $dbh->err ) { | 
					
						
							|  |  |  |                     warn "Processing $episode\n", $dbh->errstr; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     print "Updated ", join( ',', keys(%$fv) ), "\n" | 
					
						
							|  |  |  |                         if $verbose; | 
					
						
							|  |  |  |                     $updates++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 print "Dry run mode:\n"; | 
					
						
							|  |  |  |                 print "Statement: $stmt\n"; | 
					
						
							|  |  |  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # If anything is left in @db_rows then they will be replacements | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if ( scalar(@db_rows) > 0 ) { | 
					
						
							|  |  |  |         say "Replacements found: ", scalar(@db_rows); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Prepare to use SQL::Abstract | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Every row which came from the database is to be deleted | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $index = 0; | 
					
						
							|  |  |  |         foreach my $db_key (@db_rows) { | 
					
						
							|  |  |  |             $where[$index] = { episode_id => $episode }; | 
					
						
							|  |  |  |             $where[$index]->{id} = $db_key; | 
					
						
							|  |  |  |             $index++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         _debug( $DEBUG >= 2, '@where ' . Dumper( \@where ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Do deletions | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         foreach my $db_key (@db_rows) { | 
					
						
							|  |  |  |             my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |                 = $sql->delete( 'assets', shift(@where) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Perform the deletions (unless in dry-run mode) | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             unless ($dry_run) { | 
					
						
							|  |  |  |                 $sth = $dbh->prepare($stmt); | 
					
						
							|  |  |  |                 $sth->execute(@bindvals); | 
					
						
							|  |  |  |                 if ( $dbh->err ) { | 
					
						
							|  |  |  |                     warn "Processing $episode\n", $dbh->errstr; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     print "Deleted ", join( ',', @db_rows ), "\n" | 
					
						
							|  |  |  |                         if $verbose; | 
					
						
							|  |  |  |                     $deletions++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 print "Dry run mode:\n"; | 
					
						
							|  |  |  |                 print "Statement: $stmt\n"; | 
					
						
							|  |  |  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Every object in the found assets array for this episode is to be | 
					
						
							|  |  |  |         # added. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         foreach my $i (@asset_rows) { | 
					
						
							|  |  |  |             my $obj = $assets->{$episode}->[$i]; | 
					
						
							|  |  |  |             $obj->{episode_id} = $episode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |                 = $sql->insert( 'assets', $obj ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Perform the additions (unless in dry-run mode) | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             unless ($dry_run) { | 
					
						
							|  |  |  |                 $sth = $dbh->prepare($stmt); | 
					
						
							|  |  |  |                 $sth->execute(@bindvals); | 
					
						
							|  |  |  |                 if ( $dbh->err ) { | 
					
						
							|  |  |  |                     warn "Processing $episode\n", $dbh->errstr; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     print "Inserted array elements ", join( ',', @asset_rows ), "\n" | 
					
						
							|  |  |  |                         if $verbose; | 
					
						
							|  |  |  |                     $additions++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 print "Dry run mode:\n"; | 
					
						
							|  |  |  |                 print "Statement: $stmt\n"; | 
					
						
							|  |  |  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ( $additions, $deletions, $updates ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ################################################################################ | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Look for database assets to be ignored. The outer loop iterates through | 
					
						
							|  |  |  |     # the primary key values of $db_assets where the rows all match the | 
					
						
							|  |  |  |     # $episode value. For each of these assets the next loop iterates through | 
					
						
							|  |  |  |     # the array of assets in $assets->{$episode} comparing the fields we want | 
					
						
							|  |  |  |     # to check in the next inner loop. If the checked fields are all equal | 
					
						
							|  |  |  |     # then there will be nothing to do. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  | #    foreach my $db_key ( keys(%$db_assets) ) { | 
					
						
							|  |  |  | #        foreach my $a_obj ( @{ $assets->{$episode} } ) { | 
					
						
							|  |  |  | #            $ignore = $delete = $update = 0; | 
					
						
							|  |  |  | #            foreach my $fld ( 'filename', 'URL' ) { | 
					
						
							|  |  |  | #                $ignore | 
					
						
							|  |  |  | #                    += equal( $db_assets->{$db_key}->{$fld}, $a_obj->{$fld} ); | 
					
						
							|  |  |  | #                $delete | 
					
						
							|  |  |  | #                    += !equal( $db_assets->{$db_key}->{$fld}, $a_obj->{$fld} ); | 
					
						
							|  |  |  | #            } | 
					
						
							|  |  |  | #            push( @{ $row_actions{'ignore'} }, $db_assets->{$db_key}->{id} ) | 
					
						
							|  |  |  | #                if $ignore == 2; | 
					
						
							|  |  |  | #            push( @{ $row_actions{'delete'} }, $db_assets->{$db_key}->{id} ) | 
					
						
							|  |  |  | #                if $delete == 2; | 
					
						
							|  |  |  | #        } | 
					
						
							|  |  |  | #    } | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #    _debug( $DEBUG >= 4, '%row_actions ' . Dumper( \%row_actions ) ); | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Scan the array of hashes obtained from the shownotes for $episode and | 
					
						
							|  |  |  |     # record things that need changing. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  | #    $index = 0; | 
					
						
							|  |  |  | #    $updates = 0; | 
					
						
							|  |  |  | #    foreach my $obj ( @{ $assets->{$episode} } ) { | 
					
						
							|  |  |  | #        $fieldvals[$index] = {}; | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        # | 
					
						
							|  |  |  | #        # Compare the fields we're interested in | 
					
						
							|  |  |  | #        # | 
					
						
							|  |  |  | #        foreach my $fld ( 'filename', 'URL' ) { | 
					
						
							|  |  |  | #            if ( !equal( $obj->{$fld}, $db_assets->{$obj->{filename}}->{$fld} ) ) { | 
					
						
							|  |  |  | #                $updates++; | 
					
						
							|  |  |  | #                $fieldvals[$index]->{$fld} = $obj->{$fld}; | 
					
						
							|  |  |  | #            } | 
					
						
							|  |  |  | #        } | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        # TODO | 
					
						
							|  |  |  | #        $fieldvals[$index]->{id} = $db_assets->{$obj->{filename}}->{id}; | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        $index++; | 
					
						
							|  |  |  | #    } | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #    _debug( $DEBUG >= 2, '@fieldvals: ' . Dumper( \@fieldvals ) ); | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #    if ($updates > 0 ) { | 
					
						
							|  |  |  | #        # | 
					
						
							|  |  |  | #        # Prepare to use SQL::Abstract | 
					
						
							|  |  |  | #        # | 
					
						
							|  |  |  | #        my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        $where{episode_id} = $episode; | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        foreach my $fv (@fieldvals) { | 
					
						
							|  |  |  | #            $where{id} = $fv->{id}; | 
					
						
							|  |  |  | #            delete($fv->{id}); | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #            my ( $stmt, @bindvals ) | 
					
						
							|  |  |  | #                = $sql->update( 'assets', $fv, \%where ); | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #            # | 
					
						
							|  |  |  | #            # Perform the updates (unless in dry-run mode) | 
					
						
							|  |  |  | #            # | 
					
						
							|  |  |  | #            unless ($dry_run) { | 
					
						
							|  |  |  | #                $sth = $dbh->prepare($stmt); | 
					
						
							|  |  |  | #                $sth->execute(@bindvals); | 
					
						
							|  |  |  | #                if ( $dbh->err ) { | 
					
						
							|  |  |  | #                    warn "Processing $episode\n", $dbh->errstr; | 
					
						
							|  |  |  | #                } | 
					
						
							|  |  |  | #                else { | 
					
						
							|  |  |  | #                    print "Updated ", join( ',', keys(%$fv) ), "\n" | 
					
						
							|  |  |  | #                        if $verbose; | 
					
						
							|  |  |  | #                } | 
					
						
							|  |  |  | #            } | 
					
						
							|  |  |  | #            else { | 
					
						
							|  |  |  | #                print "Dry run mode:\n"; | 
					
						
							|  |  |  | #                print "Statement: $stmt\n"; | 
					
						
							|  |  |  | #                print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  | #            } | 
					
						
							|  |  |  | #        } | 
					
						
							|  |  |  | #    } | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | ################################################################################ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: insert_assets | 
					
						
							|  |  |  | #      PURPOSE: Find missing assets and add them to SQLite | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            SQLite database handle | 
					
						
							|  |  |  | #               $episode        Numeric episode | 
					
						
							|  |  |  | #               $assets         Hashref pointing to the collected assets which | 
					
						
							|  |  |  | #                               have been found by parsing the show's notes; | 
					
						
							|  |  |  | #                               keyed by episode number. | 
					
						
							|  |  |  | #               $db_assets      Hash of assets from SQLite, keyed by filename | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Number of insertions | 
					
						
							|  |  |  | #  DESCRIPTION: The assets for this episode have been found by searching the | 
					
						
							|  |  |  | #               HTML of the show notes and any linked HTML files for files on | 
					
						
							|  |  |  | #               the HPR server.  The equivalent assets have also been | 
					
						
							|  |  |  | #               collected from the SQLite database. The MySQL assets are | 
					
						
							|  |  |  | #               reorganised by filename. For each file which does not already | 
					
						
							|  |  |  | #               exist in the SQLite list details are prepared and passed to | 
					
						
							|  |  |  | #               SQL::Abstract using the multi insert feature. The missing | 
					
						
							|  |  |  | #               details are inserted into the assets table unless dry-run mode | 
					
						
							|  |  |  | #               is on. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub insert_assets { | 
					
						
							|  |  |  |     my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ($sth, %db_by_file, %assets_by_file, $insertions, @multi_insert, %where); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Reorder the %$db_assets hash in filename order | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $key ( keys( %{$db_assets} ) ) { | 
					
						
							|  |  |  |         $db_by_file{$db_assets->{$key}->{filename}} = $db_assets->{$key}; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Reorder the %assets hash in filename order | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $obj ( @{ $assets->{$episode} } ) { | 
					
						
							|  |  |  |         $assets_by_file{$obj->{filename}} = $obj; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, "%db_by_file: " . Dumper( \%db_by_file ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, "%assets_by_file: " . Dumper( \%assets_by_file ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Find stuff for insertion. We need to build @multi_insert such that it | 
					
						
							|  |  |  |     # contains a list of hashrefs, each containing the fields we want to | 
					
						
							|  |  |  |     # insert. | 
					
						
							|  |  |  |     # Note: We make a new %fv each time, fill it as a slice, and push it into | 
					
						
							|  |  |  |     # the array @multi_insert. This may be rather obscure and therefore | 
					
						
							|  |  |  |     # error-prone. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $insertions = 0; | 
					
						
							|  |  |  |     foreach my $key ( keys(%assets_by_file) ) { | 
					
						
							|  |  |  | #        if ( !exists( $db_assets->{$key} ) ) { | 
					
						
							|  |  |  |         if ( !exists( $db_by_file{$key} ) ) { | 
					
						
							|  |  |  |             my %fv; | 
					
						
							|  |  |  |             $insertions++; | 
					
						
							|  |  |  |             @fv{ 'episode_id', 'URL', 'filename' } = ( | 
					
						
							|  |  |  |                 $episode, $assets_by_file{$key}->{URL}, | 
					
						
							|  |  |  |                 $assets_by_file{$key}->{filename} | 
					
						
							|  |  |  |             ); | 
					
						
							|  |  |  |             push( @multi_insert, \%fv ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, '@multi_insert: ' . Dumper( \@multi_insert ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ($insertions > 0) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Prepare to use SQL::Abstract | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |             = $sql->insert_multi( 'assets', \@multi_insert ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Perform the insertions (unless in dry-run mode) | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         unless ($dry_run) { | 
					
						
							|  |  |  |             $sth = $dbh->prepare($stmt); | 
					
						
							|  |  |  |             $sth->execute(@bindvals); | 
					
						
							|  |  |  |             if ( $dbh->err ) { | 
					
						
							|  |  |  |                 warn "Processing $episode\n", $dbh->errstr; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  | #                print "Inserted ",join(',',keys(%where)),"\n" if $verbose; | 
					
						
							|  |  |  |                 print "Inserted $insertions assets\n" if $verbose; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             print "Dry run mode:\n"; | 
					
						
							|  |  |  |             print "Statement: $stmt\n"; | 
					
						
							|  |  |  |             print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $insertions; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: delete_assets | 
					
						
							|  |  |  | #      PURPOSE: Find superfluous assets for an episode in the 'assets' table | 
					
						
							|  |  |  | #               and delete them. | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            SQLite database handle | 
					
						
							|  |  |  | #               $episode        Numeric episode | 
					
						
							|  |  |  | #               $assets         Hashref pointing to the collected assets which | 
					
						
							|  |  |  | #                               have been found by parsing the show's notes; | 
					
						
							|  |  |  | #                               keyed by episode number. | 
					
						
							|  |  |  | #               $db_assets      Hash of assets from SQLite, keyed by filename | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Number of deletions | 
					
						
							|  |  |  | #  DESCRIPTION: The assets for this episode have been found by searching the | 
					
						
							|  |  |  | #               HTML of the show notes and any linked HTML files for files on | 
					
						
							|  |  |  | #               the HPR server.  The equivalent assets have also been | 
					
						
							|  |  |  | #               collected from the SQLite database. The MySQL assets are | 
					
						
							|  |  |  | #               reorganised by filename. For each file exists in the SQLite | 
					
						
							|  |  |  | #               list details are prepared and passed to SQL::Abstract. The | 
					
						
							|  |  |  | #               unwanted details are deleted from the assets table unless | 
					
						
							|  |  |  | #               dry-run mode is on. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub delete_assets { | 
					
						
							|  |  |  |     my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ($sth, %db_by_file, %assets_by_file, $deletions, %where); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Reorder the %$db_assets hash in filename order | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $key ( keys( %{$db_assets} ) ) { | 
					
						
							|  |  |  |         $db_by_file{$db_assets->{$key}->{filename}} = $db_assets->{$key}; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Reorder the %assets hash in filename order | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $obj ( @{ $assets->{$episode} } ) { | 
					
						
							|  |  |  |         $assets_by_file{$obj->{filename}} = $obj; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, "%db_by_file: " . Dumper( \%db_by_file ) ); | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, "%assets_by_file: " . Dumper( \%assets_by_file ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Only delete assets with this value in the 'episode_id' field | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $where{episode_id} = $episode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Find stuff for deletion. We need to build %where such that it generates | 
					
						
							|  |  |  |     # 'WHERE filename = <value1> OR filename = <value2> ...', so we add these | 
					
						
							|  |  |  |     # values to an (anonymous) array. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $deletions = 0; | 
					
						
							|  |  |  |     foreach my $key ( keys(%$db_assets) ) { | 
					
						
							|  |  |  | #        if ( !exists( $assets_by_file{$key} ) ) { | 
					
						
							|  |  |  |         if ( !exists( $db_by_file{$key} ) ) { | 
					
						
							|  |  |  |             $deletions++; | 
					
						
							|  |  |  |             if ( !exists( $where{filename} ) ) { | 
					
						
							|  |  |  |                 $where{filename} = [$key]; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 push( @{ $where{filename} }, $key ); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, "%where: " . Dumper( \%where ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ($deletions > 0) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Prepare to use SQL::Abstract | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |             = $sql->delete( 'assets', \%where ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Perform the deletions (unless in dry-run mode) | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         unless ($dry_run) { | 
					
						
							|  |  |  |             $sth = $dbh->prepare($stmt); | 
					
						
							|  |  |  |             $sth->execute(@bindvals); | 
					
						
							|  |  |  |             if ( $dbh->err ) { | 
					
						
							|  |  |  |                 warn "Processing $episode\n", $dbh->errstr; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 print "Deleted ",join(',',keys(%where)),"\n" if $verbose; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             print "Dry run mode:\n"; | 
					
						
							|  |  |  |             print "Statement: $stmt\n"; | 
					
						
							|  |  |  |             print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $deletions; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: update_episode | 
					
						
							|  |  |  | #      PURPOSE: Perform a database update for a specific episode | 
					
						
							|  |  |  | #   PARAMETERS: $dbh1           Handle for the MariaDB database | 
					
						
							|  |  |  | #               $dbh2           Handle for the SQLite database | 
					
						
							|  |  |  | #               $episode        Target episode to update | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Performs an update for a single episode in the SQLite | 
					
						
							|  |  |  | #               database. The usual scenario is after the Community News show | 
					
						
							|  |  |  | #               has been posted, or a spelling mistake has been corrected for | 
					
						
							|  |  |  | #               an episode. The episode is selected from the MySQL database | 
					
						
							|  |  |  | #               and the SQLite one. The 'title' and 'summary' fields are | 
					
						
							|  |  |  | #               compared across the two copies, and if they differ the | 
					
						
							|  |  |  | #               necessary changes are set up to be executed or reported in | 
					
						
							|  |  |  | #               'dry run' mode. | 
					
						
							|  |  |  | #               NOTE: No longer done this way: | 
					
						
							|  |  |  | #               |A search for assets is made by reading the show notes and any | 
					
						
							|  |  |  | #               |notes that are linked from these, and if there are | 
					
						
							|  |  |  | #               |supplementary files they are accumulated and the database | 
					
						
							|  |  |  | #               |updated as necessary (under control of the 'dry run' option). | 
					
						
							|  |  |  | #               |This is currently the only way that the row in the SQLite | 
					
						
							|  |  |  | #               |database can be updated after it has been loaded from the | 
					
						
							|  |  |  | #               |MySQL database, other than when the metadata is collected and | 
					
						
							|  |  |  | #               |applied. The metadata update process does not fix errors in | 
					
						
							|  |  |  | #               |the 'title' or 'summary' fields. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub update_episode { | 
					
						
							|  |  |  |     my ( $dbh1, $dbh2, $episode, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sql1, $sql2, $sth1, $h1, $sth2, $h2, $sth3 ); | 
					
						
							|  |  |  |     my ( %assets, $asset_count, $assets_added, $assets_deleted, $assets_updated ); | 
					
						
							|  |  |  |     my ( $id, $notes, $links ); | 
					
						
							|  |  |  |     my ( $updates, %fieldvals, %where ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # SQL to find new rows in the MySQL database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql1 = q{ | 
					
						
							|  |  |  |         SELECT | 
					
						
							|  |  |  |             id, | 
					
						
							|  |  |  |             date, | 
					
						
							|  |  |  |             title, | 
					
						
							|  |  |  |             summary, | 
					
						
							|  |  |  |             notes | 
					
						
							|  |  |  |         FROM eps | 
					
						
							|  |  |  |         WHERE id = ? | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # SQL to look for pre-existing episodes in the SQLite database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql2 = q{ | 
					
						
							|  |  |  |         SELECT * FROM episodes WHERE id = ? | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Prepare the SQL statements | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sth1 = $dbh1->prepare($sql1); | 
					
						
							|  |  |  |     $sth2 = $dbh2->prepare($sql2); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Query MySQL, for the $episode | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sth1->execute($episode); | 
					
						
							|  |  |  |     if ( $dbh1->err ) { | 
					
						
							|  |  |  |         croak $dbh1->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Loop through the MySQL row (should only be one) | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $asset_count = $updates = 0; | 
					
						
							|  |  |  |     while ( $h1 = $sth1->fetchrow_hashref ) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Query the SQLite database for the episode number we just got | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $sth2->execute( $h1->{id} ); | 
					
						
							|  |  |  |         if ( $dbh2->err ) { | 
					
						
							|  |  |  |             croak $dbh2->errstr; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         $h2 = $sth2->fetchrow_hashref; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # If the row doesn't exist we have a problem. | 
					
						
							|  |  |  |         # TODO: Could we just add the episode at this point? | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         unless ($h2) { | 
					
						
							|  |  |  |             warn "Row $episode is not in the SQLite database\n"; | 
					
						
							|  |  |  |             return; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Look for title and summary changes and build a hash holding the | 
					
						
							|  |  |  |         # changes | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         foreach my $fld ( 'title', 'summary' ) { | 
					
						
							|  |  |  |             if ( !equal( $h1->{$fld}, $h2->{$fld} ) ) { | 
					
						
							|  |  |  |                 $updates++; | 
					
						
							|  |  |  |                 $fieldvals{$fld} = $h1->{$fld}; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ($updates > 0 ) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Prepare to use SQL::Abstract | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $where{id} = $episode; | 
					
						
							|  |  |  |             my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |                 = $sql->update( 'episodes', \%fieldvals, \%where ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Perform the updates (unless in dry-run mode) | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             unless ($dry_run) { | 
					
						
							|  |  |  |                 $sth3 = $dbh2->prepare($stmt); | 
					
						
							|  |  |  |                 $sth3->execute(@bindvals); | 
					
						
							|  |  |  |                 if ( $dbh2->err ) { | 
					
						
							|  |  |  |                     warn "Processing $episode\n", $dbh2->errstr; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     print "Updated ",join(',',keys(%fieldvals)),"\n" if $verbose; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 print "Dry run mode:\n"; | 
					
						
							|  |  |  |                 print "Statement: $stmt\n"; | 
					
						
							|  |  |  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Check this show for "assets". Even in dry-run mode this gets | 
					
						
							|  |  |  |         # executed | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $links = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $id = $h1->{id}; | 
					
						
							|  |  |  |         $notes = $h1->{notes}; | 
					
						
							|  |  |  |         # $links = find_links($id,$notes,\%assets); | 
					
						
							|  |  |  |         $links = find_assets($ssh,$episode,$rempath,\%assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $asset_count += $links; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( $asset_count > 0 ) { | 
					
						
							|  |  |  |         _debug( $DEBUG >= 2, "Scanned assets:\n" . Dumper( \%assets ) ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         _debug( $DEBUG >= 2, "No assets found" ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Add any assets collected from the episode just processed | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $assets_added = 0; | 
					
						
							|  |  |  |     if ($asset_count > 0) { | 
					
						
							|  |  |  |         ( $assets_added, $assets_deleted, $assets_updated ) = | 
					
						
							|  |  |  |             process_assets( $dbh2, \%assets, 0, $dry_run, $verbose ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Episode checked:", $episode; | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Episode updates:", $updates; | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets found:",    $asset_count; | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets added:",    coalesce( $assets_added,   0 ); | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets deleted:",  coalesce( $assets_deleted, 0 ); | 
					
						
							|  |  |  |     printf "%-16s %d\n", "Assets updated:",  coalesce( $assets_updated, 0 ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $log->info("Episode number: $episode"); | 
					
						
							|  |  |  |     $log->info("Episode updates: $updates"); | 
					
						
							|  |  |  |     $log->info( 'Assets added,deleted,updated:', | 
					
						
							|  |  |  |         coalesce( $assets_added, 0 ), ',', | 
					
						
							|  |  |  |         coalesce( $assets_deleted, 0 ), ',', | 
					
						
							|  |  |  |         coalesce( $assets_updated, 0 ) | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: load_metadata | 
					
						
							|  |  |  | #      PURPOSE: Loads values from a JSON file which has been generated by the | 
					
						
							|  |  |  | #               'ia metadata' command and stores them in the 'episodes' table. | 
					
						
							|  |  |  | #               Also uses the JSON data to determine if the derived files and | 
					
						
							|  |  |  | #               the source file have been uploaded and which assets have been | 
					
						
							|  |  |  | #               uploaded. | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            Handle for the SQLite database | 
					
						
							|  |  |  | #               $file           Name of file of JSON data | 
					
						
							|  |  |  | #               $dry_run        Boolean dry run setting | 
					
						
							|  |  |  | #               $verbose        Boolean verbose setting | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: The input file is expected to consist of many JSON complex | 
					
						
							|  |  |  | #               objects, generated by the command 'ia metadata <identifier>' | 
					
						
							|  |  |  | #               for a list of HPR episodes (see script 'collect_show_data'). | 
					
						
							|  |  |  | #               A count is kept of successful updates, and this is reported | 
					
						
							|  |  |  | #               once finished. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub load_metadata { | 
					
						
							|  |  |  |     my ( $dbh, $file, $dry_run, $verbose ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $filebuffer, @jsonbuffer, %cols, %fieldvals, %where ); | 
					
						
							|  |  |  |     my ( @deletions, $index ); | 
					
						
							|  |  |  |     my ( @stash, @assets, $orig_count, $source, $epid, $count, $updates ); | 
					
						
							|  |  |  |     my ( $sth1, $sth2, $sth3, $rv, $h ); | 
					
						
							|  |  |  |     my $template = "https://archive.org/details/hpr%04d"; | 
					
						
							|  |  |  |     my @flds = ( | 
					
						
							|  |  |  |         'uploaded',          'has_files', | 
					
						
							|  |  |  |         'with_files',        'with_derived', | 
					
						
							|  |  |  |         'with_source',       'archive_date', | 
					
						
							|  |  |  |         'item_last_updated', 'IA_URL', | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  |     my $re = qr{^hpr\d{4}\.(flac|mp3|ogg|opus|spx|wav)$}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Read the entire JSON file into a buffer | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     try { | 
					
						
							|  |  |  |         $filebuffer = read_text($file); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     catch { | 
					
						
							|  |  |  |         die "Failed to read JSON file $file\n"; | 
					
						
							|  |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Parse the JSON from the buffer | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     my $json = JSON->new; | 
					
						
							|  |  |  |     @jsonbuffer = $json->incr_parse($filebuffer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     #print Dumper(\@jsonbuffer),"\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Check that this is the type of JSON we need | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     unless (@jsonbuffer) { | 
					
						
							|  |  |  |         warn "Empty JSON?\n"; | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Look at each top-level JSON object and check for missing components. | 
					
						
							|  |  |  |     # It's possible for there to be missing objects, but only in very rare | 
					
						
							|  |  |  |     # cases when a range has been requested yet there's a gap in that range | 
					
						
							|  |  |  |     # (reserved slot perhaps). We accumulate indices of invalid objects in | 
					
						
							|  |  |  |     # @jsonbuffer and then delete the invalid objects. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $index = 0; | 
					
						
							|  |  |  |     foreach my $obj (@jsonbuffer) { | 
					
						
							|  |  |  |         unless ( exists( $obj->{metadata} ) | 
					
						
							|  |  |  |             && exists( $obj->{metadata}->{identifier} ) | 
					
						
							|  |  |  |             && exists( $obj->{files} )) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             warn "Invalid JSON? Skipped object #$index\n"; | 
					
						
							|  |  |  |             push( @deletions, $index ); | 
					
						
							|  |  |  |             next; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         $index++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Delete any bad objects we found in the previous pass | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     for my $ind (@deletions) { | 
					
						
							|  |  |  |         splice( @jsonbuffer, $ind, 1 ); | 
					
						
							|  |  |  |         print STDERR "Deleted invalid item with index $ind\n" if $verbose; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     print "\n" if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Prepare to get the database row relating to an episode | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sth1 = $dbh->prepare("SELECT * FROM episodes WHERE id = ?"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Prepare to get the rows relating to assets for an episode | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sth2 = $dbh->prepare("SELECT * FROM assets WHERE episode_id = ?"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $count = 0; | 
					
						
							|  |  |  |     foreach my $obj (@jsonbuffer) { | 
					
						
							|  |  |  |         if ( $obj->{metadata}->{identifier} =~ /hpr(\d{4})/ ) { | 
					
						
							|  |  |  |             $epid = $1 + 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Skip the episodes in the range 1-620 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             #next if ( $epid <= 620 ); | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # 2021-06-27: We don't skip any more. Shows 1-620 have been | 
					
						
							|  |  |  |             # uploaded in blocks, but we want them on the IA in the same | 
					
						
							|  |  |  |             # format as all the others. Today we hit 620, working backwards | 
					
						
							|  |  |  |             # from the earliest IA upload as a project to upload everything | 
					
						
							|  |  |  |             # with show notes and assets. The existing database entries were | 
					
						
							|  |  |  |             # changed to make the 'notes' field hold the text 'Block upload' | 
					
						
							|  |  |  |             # followed by the block archive date and the URL. Then the rows | 
					
						
							|  |  |  |             # between 1-620 are then to have the new date and URL and these | 
					
						
							|  |  |  |             # columns have been made NULL in preparation. See the Journal for | 
					
						
							|  |  |  |             # more information. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if ($verbose) { | 
					
						
							|  |  |  |                 print "Processing ",  $obj->{metadata}->{identifier}, "\n"; | 
					
						
							|  |  |  |                 print "Publicdate: ", $obj->{metadata}->{publicdate}, "\n"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Process the files in the metadata | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $orig_count = 0; | 
					
						
							|  |  |  |             $source     = 0; | 
					
						
							|  |  |  |             undef(@stash); | 
					
						
							|  |  |  |             foreach my $f ( @{ $obj->{files} } ) { | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # Skip anything with a 'Metatda' format or which has a source | 
					
						
							|  |  |  |                 # of 'derivative' or 'metadata' or is the newly added | 
					
						
							|  |  |  |                 # '__ia_thumb.jpg' | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 next | 
					
						
							|  |  |  |                     if ( $f->{format} eq 'Metadata' | 
					
						
							|  |  |  |                     || $f->{source} =~ /^(derivative|metadata)$/ | 
					
						
							|  |  |  |                     || $f->{name} eq '__ia_thumb.jpg' | 
					
						
							|  |  |  |                     || $f->{name} =~ /^history\/files\// ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # Count audio files which have been "derived" (by us) prior to upload, | 
					
						
							|  |  |  |                 # and those that look like 'source' files (e.g. | 
					
						
							|  |  |  |                 # hpr2472_source.flac). Anything else store in @stash because | 
					
						
							|  |  |  |                 # it's probably an asset file. | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 if ( $f->{name} =~ $re ) { | 
					
						
							|  |  |  |                     $orig_count++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 elsif ( $f->{name} =~ /^hpr\d{4}_source\..+$/ ) { | 
					
						
							|  |  |  |                     $source++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     push(@stash,$f->{name}); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 printf "%s %s\n", $f->{name}, $f->{source} if $verbose; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Check the assets before updating the episode | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             @assets = collect_assets($dbh, $sth2, $epid); | 
					
						
							|  |  |  |             _debug( $DEBUG >= 2, '@assets: ' . Dumper(\@assets)); | 
					
						
							|  |  |  |             _debug( $DEBUG >= 2, '@stash: ' . Dumper(\@stash)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if (@assets || @stash) { | 
					
						
							|  |  |  |                 @assets = sort(@assets); | 
					
						
							|  |  |  |                 @stash = sort(@stash); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # Smart match the @assets and @stash arrays to look for | 
					
						
							|  |  |  |                 # difference. In @assets we have what the database holds, and | 
					
						
							|  |  |  |                 # in @stash we have stored what we found in the JSON. | 
					
						
							|  |  |  |                 # NOTE: Now that Smartmatch ('~~') is deprecated we use the | 
					
						
							|  |  |  |                 # rather clever 'match::smart' with its strange |M| operator. | 
					
						
							|  |  |  |                 # It needs arrays being matched to be offered as references | 
					
						
							|  |  |  |                 # however. | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # unless ( @assets ~~ @stash ) { | 
					
						
							|  |  |  |                 unless ( \@assets |M| \@stash ) { | 
					
						
							|  |  |  |                     print "Difference between DB assets and IA\n"; | 
					
						
							|  |  |  |                     print "DB assets:\n    ",join("\n    ",@assets),"\n"; | 
					
						
							|  |  |  |                     print "IA assets:\n    ",join("\n    ",@stash),"\n"; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # In live mode mark all assets belonging to this episode as | 
					
						
							|  |  |  |                 # 'uploaded'. | 
					
						
							|  |  |  |                 # TODO: Is this dangerous? | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 unless ($dry_run) { | 
					
						
							|  |  |  |                     mark_assets_uploaded($dbh,$epid); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     print "Update of 'assets' table skipped; dry run\n"; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Prepare to use SQL::Abstract | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             my $sql = SQL::Abstract->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Get the current row for this episode for comparison to find what | 
					
						
							|  |  |  |             # has been changed. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $rv = $sth1->execute($epid); | 
					
						
							|  |  |  |             if ( $dbh->err ) { | 
					
						
							|  |  |  |                 warn $dbh->errstr; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $h = $sth1->fetchrow_hashref; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Store the changes | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $cols{'uploaded'}          = 1; | 
					
						
							|  |  |  |             $cols{'has_files'}         = ( scalar(@stash) > 0 ? 1 : 0 ); | 
					
						
							|  |  |  |             $cols{'with_files'}        = ( scalar(@stash) > 0 ? 1 : 0 ); | 
					
						
							|  |  |  |             $cols{'with_derived'}      = ( $orig_count >= 2   ? 1 : 0 ); | 
					
						
							|  |  |  |             $cols{'with_source'}       = ( $source > 0        ? 1 : 0 ); | 
					
						
							|  |  |  |             $cols{'archive_date'}      = $obj->{metadata}->{publicdate}; | 
					
						
							|  |  |  |             $cols{'item_last_updated'} = coalesce($obj->{item_last_updated},0); | 
					
						
							|  |  |  |             $cols{'IA_URL'}            = sprintf( $template, $epid ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Look for differences, storing the updated values in %fieldvals | 
					
						
							|  |  |  |             # and counting the changes. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $updates = 0; | 
					
						
							|  |  |  |             undef(%fieldvals); | 
					
						
							|  |  |  |             foreach my $fld (@flds) { | 
					
						
							|  |  |  |                 if ( !equal( $h->{$fld}, $cols{$fld} ) ) { | 
					
						
							|  |  |  |                     $updates++; | 
					
						
							|  |  |  |                     $fieldvals{$fld} = $cols{$fld}; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if ( $updates > 0 ) { | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # Build the update statement containing changes. It will be: | 
					
						
							|  |  |  |                 # "UPDATE episodes SET a = b, c = d, ... WHERE id = $epid" | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 $where{id} = $epid; | 
					
						
							|  |  |  |                 my ( $stmt, @bindvals ) | 
					
						
							|  |  |  |                     = $sql->update( 'episodes', \%fieldvals, \%where ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # Perform the updates (unless in dry-run mode) | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 unless ($dry_run) { | 
					
						
							|  |  |  |                     $sth3 = $dbh->prepare($stmt); | 
					
						
							|  |  |  |                     $sth3->execute(@bindvals); | 
					
						
							|  |  |  |                     if ( $dbh->err ) { | 
					
						
							|  |  |  |                         warn "Processing $epid\n", $dbh->errstr; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     else { | 
					
						
							|  |  |  |                         print "Updated\n" if $verbose; | 
					
						
							|  |  |  |                         $count++; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 else { | 
					
						
							|  |  |  |                     print "Dry run mode:\n"; | 
					
						
							|  |  |  |                     print "Statement: $stmt\n"; | 
					
						
							|  |  |  |                     print "Bindvals: ", join( ", ", @bindvals ), "\n"; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             printf "Unexpected identifier: %s\n", $obj->{metadata}->{identifier}; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         print '-' x 80, "\n" if $verbose; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print "Number of updates: $count\n"; | 
					
						
							|  |  |  |     $log->info("Number of updates from JSON: $count"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: collect_assets | 
					
						
							|  |  |  | #      PURPOSE: Collects asset names for a given episode | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            Handle for the SQLite database | 
					
						
							|  |  |  | #               $sth            Statement handle for the prepared query | 
					
						
							|  |  |  | #               $epid           Episode id | 
					
						
							|  |  |  | #      RETURNS: A list of the asset names | 
					
						
							|  |  |  | #  DESCRIPTION: Just prepares a list of asset filenames by querying the SQLite | 
					
						
							|  |  |  | #               'assets' table and returns it to the caller | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub collect_assets { | 
					
						
							|  |  |  |     my ( $dbh, $sth, $epid ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $rv, $h, @result ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Query the database | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $rv = $sth->execute($epid); | 
					
						
							|  |  |  |     if ( $dbh->err ) { | 
					
						
							|  |  |  |         warn $dbh->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while ($h = $sth->fetchrow_hashref) { | 
					
						
							|  |  |  |         push(@result,$h->{filename}); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return @result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: mark_assets_uploaded | 
					
						
							|  |  |  | #      PURPOSE: Marks the assets relating to an episode as 'uploaded' | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            Handle for the SQLite database | 
					
						
							|  |  |  | #               $epid           Episode id | 
					
						
							|  |  |  | #      RETURNS: Number of updates | 
					
						
							|  |  |  | #  DESCRIPTION: Mark any assets for a given episode as uploaded, if they are | 
					
						
							|  |  |  | #               not so marked. Expected to be used during an update from JSON | 
					
						
							|  |  |  | #               data collected from the IA. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub mark_assets_uploaded { | 
					
						
							|  |  |  |     my ( $dbh, $epid ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ($rows); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $rows = $dbh->do( | 
					
						
							|  |  |  |         q{UPDATE assets SET uploaded = 1 | 
					
						
							|  |  |  |           WHERE episode_id = ? AND uploaded = 0}, | 
					
						
							|  |  |  |         undef, | 
					
						
							|  |  |  |         $epid, | 
					
						
							|  |  |  |     ) or die $dbh->errstr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $rows += 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $rows; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: scan_dir | 
					
						
							|  |  |  | #      PURPOSE: Scans a file from the HPR server generated with a simple | 
					
						
							|  |  |  | #               'find' command which lists all the files and directories under | 
					
						
							|  |  |  | #               the ~/www/eps/ directory. | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            Handle for the SQLite database | 
					
						
							|  |  |  | #               $file           Name of file of directory data | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Under development | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub scan_dir { | 
					
						
							|  |  |  |     my ( $dbh, $file ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( %dirtree, $ptr, %shows, $key, $id ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Open the file of paths | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     open( my $fh, "<:encoding(UTF-8)", $file ) | 
					
						
							|  |  |  |         or die "Unable to open '$file'\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Loop through all paths. If it's one of the audio files then skip it | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     while ( my $line = <$fh> ) { | 
					
						
							|  |  |  |         chomp($line); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         next if ( $line =~ /(flac|mp3|ogg|opus|spx|wav)$/ ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Point into the tree hash and add the necessary nodes from the split | 
					
						
							|  |  |  |         # path. The result is a hierarchical has structure where the 'leaf' | 
					
						
							|  |  |  |         # node has an empty anonymous hash as its value. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # %dirtree = { | 
					
						
							|  |  |  |         #     'www' => { | 
					
						
							|  |  |  |         #        'eps' => { | 
					
						
							|  |  |  |         #            'hpr2356' => { | 
					
						
							|  |  |  |         #                 'index.html' => {}, | 
					
						
							|  |  |  |         #                 'fix-ssh-on-pi.bash.txt' => {} | 
					
						
							|  |  |  |         #             }, | 
					
						
							|  |  |  |         #         }, | 
					
						
							|  |  |  |         #     }, | 
					
						
							|  |  |  |         # } | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $ptr = \%dirtree; | 
					
						
							|  |  |  |         $ptr = $ptr->{$_} //= {} for split( /\//, $line ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     close($fh); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     #print Dumper(\%dirtree),"\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Build a hash indexed by IA identifier containing an array per value with | 
					
						
							|  |  |  |     # the files relating to the show therein | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $ptr = \%dirtree; | 
					
						
							|  |  |  |     # TODO if there are multiple items at a level which one to test | 
					
						
							|  |  |  |     $key = ( keys(%$ptr) )[0]; | 
					
						
							|  |  |  |     #$key = first { defined($_) } keys(%$ptr); | 
					
						
							|  |  |  |     until ( $key =~ /^hpr[0-9]{4}/ ) { | 
					
						
							|  |  |  |         print "$key\n"; | 
					
						
							|  |  |  |         $ptr = $ptr->{$key}; | 
					
						
							|  |  |  |         $key = ( keys(%$ptr) )[0]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     #print join(", ",keys(%$ptr)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # TODO Doesn't deal with sub-directories | 
					
						
							|  |  |  |     foreach my $k ( keys(%$ptr) ) { | 
					
						
							|  |  |  |         if ( ($id) = ( $k =~ /^(hpr[0-9]{4})/ ) ) { | 
					
						
							|  |  |  |             unless ( defined( $shows{$id} ) ) { | 
					
						
							|  |  |  |                 $shows{$id} = []; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             push( @{ $shows{$id} }, $k ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print Dumper( \%shows ), "\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: find_assets | 
					
						
							|  |  |  | #      PURPOSE: An alternative way to look for assets; connect to the server | 
					
						
							|  |  |  | #               with SSH and use a 'find' command for matching files and | 
					
						
							|  |  |  | #               directories. | 
					
						
							|  |  |  | #   PARAMETERS: $ssh            Net::OpenSSH object previously set up to | 
					
						
							|  |  |  | #                               connect to the server | 
					
						
							|  |  |  | #               $episode        episode number we're dealing with | 
					
						
							|  |  |  | #               $rempath        path to search on the server | 
					
						
							|  |  |  | #               $rfiles         hashref to receive the files (assets) found | 
					
						
							|  |  |  | #      RETURNS: Number of files found | 
					
						
							|  |  |  | #  DESCRIPTION:  | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub find_assets { | 
					
						
							|  |  |  |     my ($ssh, $episode, $rempath, $rfiles) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my $urlbase = 'https://hackerpublicradio.org/eps/'; | 
					
						
							|  |  |  |     my (@result, @row, $URL, $file, $assetcount); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $rempath .= '/' unless ($rempath =~ /\/$/); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @result = $ssh->capture({ timeout => 10 }, | 
					
						
							|  |  |  |         "find $rempath -regextype egrep -regex '.*/hpr${episode}.*' -printf '%y,%p,%s\n'"); | 
					
						
							|  |  |  |     $ssh->error and warn "remote 'find' command failed: " . $ssh->error; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #       d,/home/hpr/www/eps/hpr3775,85 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775/hpr3775_source.wav,75006546 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775/hpr3775.vtt,13127 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775/hpr3775.srt,13830 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775/hpr3775.txt,11119 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775.mp3,7449559 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775.ogg,10631998 | 
					
						
							|  |  |  | #       f,/home/hpr/www/eps/hpr3775.spx,3516736 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $assetcount = 0; | 
					
						
							|  |  |  |     $rfiles->{$episode} = []; | 
					
						
							|  |  |  |     for my $file (@result) { | 
					
						
							|  |  |  |         @row = split( ',', $file ); | 
					
						
							|  |  |  |         if ( $row[0] eq 'f' ) { | 
					
						
							|  |  |  |             unless ( $row[1] =~ /$episode\.(mp3|ogg|spx)$/ ) { | 
					
						
							|  |  |  |                 ( $URL  = $row[1] ) =~ s/$rempath/$urlbase/; | 
					
						
							|  |  |  |                 ( $file = $row[1] ) =~ s/$rempath//; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 push( | 
					
						
							|  |  |  |                     @{ $rfiles->{$episode} }, | 
					
						
							|  |  |  |                         { filename => $file, URL => $URL } | 
					
						
							|  |  |  |                 ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 $assetcount++; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 2, "Results from 'find':\n" . Dumper( \@result ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Return the asset count | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return $assetcount; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: find_links | 
					
						
							|  |  |  | #      PURPOSE: Parses the HTML in a string for links so that a list of assets | 
					
						
							|  |  |  | #               relating to this show can be made | 
					
						
							|  |  |  | #   PARAMETERS: $episode        episode number we're dealing with | 
					
						
							|  |  |  | #               $html           string containing HTML | 
					
						
							|  |  |  | #               $rlinks         hashref to receive the links found | 
					
						
							|  |  |  | #      RETURNS: Number of links found | 
					
						
							|  |  |  | #  DESCRIPTION: Given HTML from the main notes or a subsidiary file the | 
					
						
							|  |  |  | #               function parses this looking for links in 'a' or 'img' tags. | 
					
						
							|  |  |  | #               Links are standardised, making them absolute if relative and | 
					
						
							|  |  |  | #               removing any 'fragment'. The links need to be to files on the | 
					
						
							|  |  |  | #               HPR website to be of interest. If so the filename part is | 
					
						
							|  |  |  | #               extracted. If it follows the format 'hpr9999' then it's | 
					
						
							|  |  |  | #               checked to see if it's for the current show. If not it's | 
					
						
							|  |  |  | #               ignored. If the filename ends with a '/' then it's assumed | 
					
						
							|  |  |  | #               it's shorthand for 'index.html' so this name is appended. Then | 
					
						
							|  |  |  | #               the link and filename are stashed in the hash referenced by | 
					
						
							|  |  |  | #               $rlinks. If the filename ends with '.html' then we need to | 
					
						
							|  |  |  | #               parse it in turn, so we get the contents of the link and | 
					
						
							|  |  |  | #               recurse to parse it. We return the number of HPR links found | 
					
						
							|  |  |  | #               in the pass through the HTML. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub find_links { | 
					
						
							|  |  |  |     my ( $episode, $html, $rlinks ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ($tree, $epstr, $linkre,    $re2, $filepath, | 
					
						
							|  |  |  |         $uri,  $slink, $linkcount, $content | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 3, "find_links enter\n" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Create a tree object | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $tree = HTML::TreeBuilder->new; | 
					
						
							|  |  |  |     $tree->ignore_unknown(0); | 
					
						
							|  |  |  |     $tree->no_expand_entities(1); | 
					
						
							|  |  |  |     $tree->p_strict(1); | 
					
						
							|  |  |  |     $tree->store_comments(1); | 
					
						
							|  |  |  |     $tree->warn(1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $tree->parse_content($html) | 
					
						
							|  |  |  |         or die "HTML::TreeBuilder failed to parse notes: $!\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my $baseURL = "https://hackerpublicradio.org"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $epstr = sprintf( "hpr%04d", $episode ); | 
					
						
							|  |  |  |     $linkre = qr{ | 
					
						
							|  |  |  |         ^https?:// | 
					
						
							|  |  |  |         (?:www.)? | 
					
						
							|  |  |  |         (?:hacker|hobby)publicradio.org/eps/ | 
					
						
							|  |  |  |         (.+)$ | 
					
						
							|  |  |  |     }x; | 
					
						
							|  |  |  |     #(?:(hpr$epstr/.+)|(hpr$epstr.+)|(.+))$ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Counting new links found and stashed | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $linkcount = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Scan for links | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     for ( @{ $tree->extract_links( 'a', 'img' ) } ) { | 
					
						
							|  |  |  |         my ( $link, $element, $attr, $tag ) = @$_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Standardise the link (expands relative URLs, removes any fragment). | 
					
						
							|  |  |  |         # Set $URI::ABS_REMOTE_LEADING_DOTS to ensure leading dots in relative | 
					
						
							|  |  |  |         # URIs are removed. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         local $URI::ABS_REMOTE_LEADING_DOTS = 1; | 
					
						
							|  |  |  |         $uri = URI->new_abs( $link, $baseURL ); | 
					
						
							|  |  |  |         $slink = sprintf( "%s:%s", $uri->scheme, $uri->opaque ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Is it an HPR link? | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ( $slink =~ $linkre ) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # The URL we found might be a link into an HTML file with an | 
					
						
							|  |  |  |             # '#anchor' component ("fragment"). Save the last bracketed match, | 
					
						
							|  |  |  |             # without any 'fragment' if there is one to get a clean filename | 
					
						
							|  |  |  |             # or path. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             ( $filepath = "$+" ) =~ s/#.*$//; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             _debug( $DEBUG >= 3, "Link:      $slink\n" ); | 
					
						
							|  |  |  |             _debug( $DEBUG >= 3, "File path: $filepath\n" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Does this file path begin with an 'hpr' prefix? If so is it the | 
					
						
							|  |  |  |             # show id? If not we don't want to process it. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             if ( $filepath =~ /^(hpr[0-9]{1,4})/ ) { | 
					
						
							|  |  |  |                 if ( $1 ne $epstr ) { | 
					
						
							|  |  |  |                     _debug( $DEBUG >= 3, "Ignored $slink\n" ); | 
					
						
							|  |  |  |                     next; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # The path and URL might end with a slash which means the URL is | 
					
						
							|  |  |  |             # relying on the Web server to fill in the filename as | 
					
						
							|  |  |  |             # 'index.html'. We have to make this explicit. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             if ( $slink =~ /\/$/ ) { | 
					
						
							|  |  |  |                 $slink    .= 'index.html'; | 
					
						
							|  |  |  |                 $filepath .= 'index.html'; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Initialise this hash element if needed | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             unless ( exists( $rlinks->{$episode} ) ) { | 
					
						
							|  |  |  |                 $rlinks->{$episode} = []; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Stash this filename if it's not already stashed, and if it's | 
					
						
							|  |  |  |             # HTML get the link and recurse | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             unless ( | 
					
						
							|  |  |  |                 any { $_->{filename} eq $filepath } @{ $rlinks->{$episode} } | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 _debug( $DEBUG >= 3, "Stashed $slink and $filepath\n" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 push( | 
					
						
							|  |  |  |                     @{ $rlinks->{$episode} }, | 
					
						
							|  |  |  |                         { filename => $filepath, URL => $slink } | 
					
						
							|  |  |  |                 ); | 
					
						
							|  |  |  |                 $linkcount++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 # An HTML file has to be investigated | 
					
						
							|  |  |  |                 # | 
					
						
							|  |  |  |                 if ( $filepath =~ /\.html$/ ) { | 
					
						
							|  |  |  |                     $content = get($slink); | 
					
						
							|  |  |  |                     unless ( defined($content) ) { | 
					
						
							|  |  |  |                         carp "Link $slink returned nothing\n"; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     else { | 
					
						
							|  |  |  |                         $linkcount | 
					
						
							|  |  |  |                             += find_links( $episode, $content, $rlinks ); | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _debug( $DEBUG >= 3, "find_links exiting with $linkcount links\n" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Return the link count | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return $linkcount; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: count_rows | 
					
						
							|  |  |  | #      PURPOSE: Counts the rows in a given SQLite table | 
					
						
							|  |  |  | #   PARAMETERS: $dbh            Handle for the SQLite database | 
					
						
							|  |  |  | #               $table          Name of table | 
					
						
							|  |  |  | #      RETURNS: Number of rows | 
					
						
							|  |  |  | #  DESCRIPTION: Builds SQL from a template including the selected $table. | 
					
						
							|  |  |  | #               Returns the number of rows once the query has been run. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub count_rows { | 
					
						
							|  |  |  |     my ( $dbh, $table ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $sql, $sth, $h, $count ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Count rows in the selected SQLite table | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $sql = "SELECT count(*) AS rowcount FROM $table"; | 
					
						
							|  |  |  |     $sth = $dbh->prepare($sql); | 
					
						
							|  |  |  |     $sth->execute(); | 
					
						
							|  |  |  |     if ( $dbh->err ) { | 
					
						
							|  |  |  |         croak $dbh->errstr; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $count = 0; | 
					
						
							|  |  |  |     if ($h = $sth->fetchrow_hashref) { | 
					
						
							|  |  |  |         $count = $h->{rowcount}; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $count; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: coalesce | 
					
						
							|  |  |  | #      PURPOSE: To find the first defined argument and return it | 
					
						
							|  |  |  | #   PARAMETERS: Arbitrary number of arguments | 
					
						
							|  |  |  | #      RETURNS: The first defined argument or undef if there are none | 
					
						
							|  |  |  | #  DESCRIPTION: Just a simple way of ensuring an 'undef' value is never | 
					
						
							|  |  |  | #               returned when doing so might be a problem. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub coalesce { | 
					
						
							|  |  |  |     foreach (@_) { | 
					
						
							|  |  |  |         return $_ if defined($_); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return undef;    ## no critic | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: assets_match | 
					
						
							|  |  |  | #      PURPOSE: To compare found assets and those in the database | 
					
						
							|  |  |  | #   PARAMETERS: $found_obj      an asset object (hashref) found by parsing show | 
					
						
							|  |  |  | #                               notes | 
					
						
							|  |  |  | #               $db_obj         an asset object (hashref) from the database | 
					
						
							|  |  |  | #      RETURNS: Value 1 or 2 if they match, otherwise zero | 
					
						
							|  |  |  | #  DESCRIPTION:  | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub assets_match { | 
					
						
							|  |  |  |     my ( $found_obj, $db_obj ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0 if ( !defined($found_obj) || !defined($db_obj) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $t1, $t2 ) = ( | 
					
						
							|  |  |  |         ( $found_obj->{filename} eq $db_obj->{filename} ), | 
					
						
							|  |  |  |         ( $found_obj->{URL} eq $db_obj->{URL} ) | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 2 if ( $t1 && $t2 ); | 
					
						
							|  |  |  |     return 1 if ( $t1 || $t2 ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: equal | 
					
						
							|  |  |  | #      PURPOSE: Compare two strings even if undefined | 
					
						
							|  |  |  | #   PARAMETERS: $s1             The first string | 
					
						
							|  |  |  | #               $s2             The second string | 
					
						
							|  |  |  | #      RETURNS: True if both strings are undefined, false if one isn't | 
					
						
							|  |  |  | #               defined, otherwise the result of comparing them. | 
					
						
							|  |  |  | #  DESCRIPTION: Works on the principle that two undefined strings are equal, | 
					
						
							|  |  |  | #               a defined and an undefined string are not, and otherwise they | 
					
						
							|  |  |  | #               are equal if they are equal! | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub equal { | 
					
						
							|  |  |  |     my ( $s1, $s2 ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 1 if ( !defined($s1) && !defined($s2) ); | 
					
						
							|  |  |  |     return 0 if ( !defined($s1) || !defined($s2) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ( $s1 eq $s2 ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: _debug | 
					
						
							|  |  |  | #      PURPOSE: Prints debug reports | 
					
						
							|  |  |  | #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | 
					
						
							|  |  |  | #               $message        Message to print | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | 
					
						
							|  |  |  | #               newline and then adds one in the 'print' to the caller doesn't | 
					
						
							|  |  |  | #               have to bother. Prepends the message with 'D> ' to show it's | 
					
						
							|  |  |  | #               a debug message. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub _debug { | 
					
						
							|  |  |  |     my ( $active, $message ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     chomp($message); | 
					
						
							|  |  |  |     print "D> $message\n" if $active; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: Usage | 
					
						
							|  |  |  | #      PURPOSE: Displays a usage message and exit | 
					
						
							|  |  |  | #   PARAMETERS: None | 
					
						
							|  |  |  | #      RETURNS: To command line level with exit value 1 | 
					
						
							|  |  |  | #  DESCRIPTION: Builds the usage message using global values | 
					
						
							|  |  |  | #       THROWS: no exceptions | 
					
						
							|  |  |  | #     COMMENTS: none | 
					
						
							|  |  |  | #     SEE ALSO: n/a | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | #sub Usage { | 
					
						
							|  |  |  | #    print STDERR <<EOD; | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #Usage: $PROG [options] directory | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #$PROG v$VERSION | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #    -help               Display this information | 
					
						
							|  |  |  | #    -[no]dry-run        Enable/disable dry run mode (default off) | 
					
						
							|  |  |  | #    -[no]verbose        Enable/disable verbose mode (default off) | 
					
						
							|  |  |  | #    -debug=N            Set the debug level | 
					
						
							|  |  |  | #    -mode=MODE          Selects the mode from the following list: | 
					
						
							|  |  |  | #                        - initialise: rebuild the SQLite database from MySQL | 
					
						
							|  |  |  | #                          (deletes any data already uploaded) | 
					
						
							|  |  |  | #                        - update: updates the SQLite database from updates | 
					
						
							|  |  |  | #                          found in the MySQL database | 
					
						
							|  |  |  | #                        - json: load the 'archive_date' field from a JSON | 
					
						
							|  |  |  | #                          file. Also sets the 'uploaded' flag and populates | 
					
						
							|  |  |  | #                          the 'IA_URL' field | 
					
						
							|  |  |  | #                          ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  | 
					
						
							|  |  |  | #                        - scandir: scans a file captured from the HPR server | 
					
						
							|  |  |  | #                          by running a 'find' command on the 'eps' directory | 
					
						
							|  |  |  | #                          where all the show files reside. This file is | 
					
						
							|  |  |  | #                          scanned to determine which shows have files other | 
					
						
							|  |  |  | #                          than the audio so we can keep a tally of which shows | 
					
						
							|  |  |  | #                          have had these uploaded to the IA | 
					
						
							|  |  |  | #                          ** Not fully implemented ** | 
					
						
							|  |  |  | #                          ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  | 
					
						
							|  |  |  | #    -episode=N1 [-episode=N2 ...] | 
					
						
							|  |  |  | #                        In -mode=update defines one or more single episodes to | 
					
						
							|  |  |  | #                        process otherwise, if this option is not given, all | 
					
						
							|  |  |  | #                        new episodes in the database are processed. The | 
					
						
							|  |  |  | #                        episodes are expected to have been already added if | 
					
						
							|  |  |  | #                        the option is used, and the 'title' and 'summary' | 
					
						
							|  |  |  | #                        fields and the assets (if any) are to be processed. | 
					
						
							|  |  |  | #    -range=N1 [-range=N2] | 
					
						
							|  |  |  | #                        An alternative way of defining existing shows to be | 
					
						
							|  |  |  | #                        processed in -mode=update. The option can only be | 
					
						
							|  |  |  | #                        repeated twice. The smaller number defines the start | 
					
						
							|  |  |  | #                        of the range, and the larger one the end of the range. | 
					
						
							|  |  |  | #                        So -range=834 -range=840 defines the range 836-840. If | 
					
						
							|  |  |  | #                        one number is given (or two the same) this means the | 
					
						
							|  |  |  | #                        same as defining one episode. The episodes are | 
					
						
							|  |  |  | #                        expected to have been already added and the 'title' | 
					
						
							|  |  |  | #                        and 'summary' fields and the assets (if any) are to be | 
					
						
							|  |  |  | #                        processed. | 
					
						
							|  |  |  | #    -json=FILE          Defines the file to use for -mode=json | 
					
						
							|  |  |  | #    -scandir=FILE       Defines a file to use for -mode=scandir | 
					
						
							|  |  |  | #    -dbconfig=FILE      Defines an alternative MySQL configuration file | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #EOD | 
					
						
							|  |  |  | #    exit(1); | 
					
						
							|  |  |  | #} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: Options | 
					
						
							|  |  |  | #      PURPOSE: Processes command-line options | 
					
						
							|  |  |  | #   PARAMETERS: $optref     Hash reference to hold the options | 
					
						
							|  |  |  | #      RETURNS: Undef | 
					
						
							|  |  |  | #  DESCRIPTION: Process the options we want to offer. See the documentation | 
					
						
							|  |  |  | #               for details | 
					
						
							|  |  |  | #       THROWS: no exceptions | 
					
						
							|  |  |  | #     COMMENTS: none | 
					
						
							|  |  |  | #     SEE ALSO: n/a | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub Options { | 
					
						
							|  |  |  |     my ($optref) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my @options = ( | 
					
						
							|  |  |  |         "help",       "documentation|man", | 
					
						
							|  |  |  |         "debug=i",    "mode=s", | 
					
						
							|  |  |  |         "json=s",     "scandir=s", | 
					
						
							|  |  |  |         "episode=i@", "range=i@", | 
					
						
							|  |  |  |         "dry-run!",   "verbose!", | 
					
						
							|  |  |  |         "dbconfig=s", | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !GetOptions( $optref, @options ) ) { | 
					
						
							|  |  |  |         pod2usage( | 
					
						
							|  |  |  |             -msg     => "$PROG version $VERSION\n", | 
					
						
							|  |  |  |             -exitval => 1, | 
					
						
							|  |  |  |             -verbose => 0 | 
					
						
							|  |  |  |         ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | __END__ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
					
						
							|  |  |  | #  Application Documentation | 
					
						
							|  |  |  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
					
						
							|  |  |  | #{{{ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 NAME | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | upload_manager - manages a SQLite database of details of shows on archive.org | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 VERSION | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This documentation refers to upload_manager version 0.2.19 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 USAGE | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ./upload_manager [-help] [-documentation] [-[no]dry-run] [-[no]verbose] | 
					
						
							|  |  |  |         [-debug=N] -mode=MODE [-episode=N1 [-episode=N2 ...]] [-range=START | 
					
						
							|  |  |  |         [-range=END]] [-json=FILE] [-scandir=FILE] [-dbconfig=FILE] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 OPTIONS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 8 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-help> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Display a brief summary of how to use this script. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-documentation> or B<-man> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Display the entirety of the internal documentation in the form of a manual | 
					
						
							|  |  |  | page. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-[no]dry-run> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Enable/disable dry run mode (default off) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-[no]verbose> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Enable/disable verbose mode (default off) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-debug=N> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Sets the debug level: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<0> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This is the default value, and causes no debug output to be generated. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<1> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | TBA | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<2> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Produces quite a lot of asset-related output. All of the output from the lower debug levels is | 
					
						
							|  |  |  | displayed as well as: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Contents of the %assets hash. This is generated when scanning a show in the | 
					
						
							|  |  |  | MySQL database in order to find any associated assets. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The episode number being checked and the number of assets found while | 
					
						
							|  |  |  | processing assets. A dump of the assets from the SQLite database is shown as | 
					
						
							|  |  |  | well as the count of assets found therre. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If there are assets related to a show and there are already some in the SQLite | 
					
						
							|  |  |  | database, a comparison is made to see if any updates are needed. These | 
					
						
							|  |  |  | differences are listed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If new assets are being added, these are reported from the hash | 
					
						
							|  |  |  | %assets_by_file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Assets are added using a multiple INSERT statement, and details of what is to | 
					
						
							|  |  |  | be inserted is shown in the array @multiple_insert. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<3> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Produces the most output. All of the output from the lower debug levels is | 
					
						
							|  |  |  | displayed as well as: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Contents of the @episodes array.  This is generated by using the B<-episode=N> | 
					
						
							|  |  |  | option once or many times. It is shown before and after the removal of | 
					
						
							|  |  |  | duplicates. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item . | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Contents of the @range array.  This is generated by using the B<-range=N> | 
					
						
							|  |  |  | option once or twice. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-mode=MODE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Selects the mode of operation of the script from the following list: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<initialise> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Rebuilds the SQLite database from the MySQL data. Use with caution, though | 
					
						
							|  |  |  | the script will not initialise tables which contain data. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<update> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Updates the SQLite database from updates found in the MySQL database. Exactly | 
					
						
							|  |  |  | how this is done is controlled by other options. See below for details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<json> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Load the 'archive_date' field from a JSON file. Also sets the 'uploaded' flag | 
					
						
							|  |  |  | and populates the 'IA_URL' and 'item_last_updated' fields. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<scandir> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Scans a file captured from the HPR server by running a 'find' command on the | 
					
						
							|  |  |  | 'eps' directory where all the show files reside. This file is scanned to | 
					
						
							|  |  |  | determine which shows have files other than the audio so we can keep a tally | 
					
						
							|  |  |  | of which shows have had these uploaded to the IA | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ** Not fully implemented; may be phased out ** | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-episode=N1 [-episode=N2 ...]> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | In B<-mode=update> defines one or more single episodes to process otherwise, if | 
					
						
							|  |  |  | this option is not given, all new episodes in the database are processed. The | 
					
						
							|  |  |  | episodes are expected to have been already added if this option is used, and | 
					
						
							|  |  |  | the 'title' and 'summary' fields and the assets (if any) are to be processed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-range=N1 [-range=N2]> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | An alternative way of defining existing shows to be processed in | 
					
						
							|  |  |  | B<-mode=update>.  The option can only be repeated twice. The smaller number | 
					
						
							|  |  |  | defines the start of the range, and the larger one the end of the range.  So | 
					
						
							|  |  |  | B<-range=834 -range=840> defines the range 836-840. If one number is given (or | 
					
						
							|  |  |  | two the same) this means the same as defining one episode. The episodes are | 
					
						
							|  |  |  | expected to have been already added and the 'title' and 'summary' fields and | 
					
						
							|  |  |  | the assets (if any) are to be processed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-json=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Defines the file to use for B<-mode=json>. The contents of the file are | 
					
						
							|  |  |  | expected to be those produced by running B<collect_show_data>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-scandir=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Defines a file to use for B<-mode=scandir>. This feature is not fully | 
					
						
							|  |  |  | implemented at the moment. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-dbconfig=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Defines an alternative MySQL configuration file | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 DESCRIPTION | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A full description of the application and its features. | 
					
						
							|  |  |  | May include numerous subsections (i.e. =head2, =head3, etc.) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 DIAGNOSTICS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A list of every error and warning message that the application can generate | 
					
						
							|  |  |  | (even the ones that will "never happen"), with a full explanation of each | 
					
						
							|  |  |  | problem, one or more likely causes, and any suggested remedies. If the | 
					
						
							|  |  |  | application generates exit status codes (e.g. under Unix) then list the exit | 
					
						
							|  |  |  | status associated with each error. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 CONFIGURATION AND ENVIRONMENT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A full explanation of any configuration system(s) used by the application, | 
					
						
							|  |  |  | including the names and locations of any configuration files, and the | 
					
						
							|  |  |  | meaning of any environment variables or properties that can be set. These | 
					
						
							|  |  |  | descriptions must also include details of any configuration language used | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 DEPENDENCIES | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A list of all the other modules that this module relies upon, including any | 
					
						
							|  |  |  | restrictions on versions, and an indication whether these required modules are | 
					
						
							|  |  |  | part of the standard Perl distribution, part of the module's distribution, | 
					
						
							|  |  |  | or must be installed separately. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 INCOMPATIBILITIES | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A list of any modules that this module cannot be used in conjunction with. | 
					
						
							|  |  |  | This may be due to name conflicts in the interface, or competition for | 
					
						
							|  |  |  | system or program resources, or due to internal limitations of Perl | 
					
						
							|  |  |  | (for example, many modules that use source code filters are mutually | 
					
						
							|  |  |  | incompatible). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 BUGS AND LIMITATIONS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A list of known problems with the module, together with some indication | 
					
						
							|  |  |  | whether they are likely to be fixed in an upcoming release. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Also a list of restrictions on the features the module does provide: | 
					
						
							|  |  |  | data types that cannot be handled, performance issues and the circumstances | 
					
						
							|  |  |  | in which they may arise, practical limitations on the size of data sets, | 
					
						
							|  |  |  | special cases that are not (yet) handled, etc. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The initial template usually just has: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | There are no known bugs in this module. | 
					
						
							|  |  |  | Please report problems to <Maintainer name(s)>  (<contact address>) | 
					
						
							|  |  |  | Patches are welcome. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 AUTHOR | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <Author name(s)>  (<contact address>) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 LICENCE AND COPYRIGHT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Followed by whatever licence you wish to release it under. | 
					
						
							|  |  |  | For Perl code that is often just: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This module is free software; you can redistribute it and/or | 
					
						
							|  |  |  | modify it under the same terms as Perl itself. See perldoc perlartistic. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =cut | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #}}} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # [zo to open fold, zc to close] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | 
					
						
							|  |  |  | 
 |