forked from HPR/hpr-tools
		
	
		
			
	
	
		
			2804 lines
		
	
	
		
			92 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			2804 lines
		
	
	
		
			92 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: upload_manager | ||
|  | # | ||
|  | #        USAGE: ./upload_manager [-help] [-[no]dry-run] [-[no]verbose] | ||
|  | #               [-debug=N] -mode=MODE [-episode=N1 [-episode=N2 ...]] | ||
|  | #               [-range=START [-range=END]] [-json=FILE] [-scandir=FILE] | ||
|  | #               [-dbconfig=FILE] | ||
|  | # | ||
|  | #  DESCRIPTION: Collect details about HPR shows and the copies on Archive.org | ||
|  | #               and keep them in a separate SQLite database (for | ||
|  | #               the moment). This database is used to populate the | ||
|  | #               developing PostgreSQL Database (which is not currently | ||
|  | #               progressing very much). | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: 2022-02-15 Had to revert to DBD::MySQL temporarily. | ||
|  | #               2023-06-12 This code is no longer valid. It expects the main | ||
|  | #               HTML to point to files on the server (if there are "assets") | ||
|  | #               and recurses through extra HTML it finds there. It currently | ||
|  | #               looks for file details in any sub-directory "/eps/hpr1234/". | ||
|  | #               Now there are no files, so the details of all assets | ||
|  | #               (including the audio) needs to be in the 'assets' table. Not | ||
|  | #               sure we are there yet. | ||
|  | #               2024-03-08 Smartmatch is deprecated; moved to 'match::smart'. | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.2.19 | ||
|  | #      CREATED: 2017-06-14 10:50:28 | ||
|  | #     REVISION: 2024-03-08 20:49:51 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use 5.010; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use utf8; | ||
|  | #use experimental 'smartmatch'; | ||
|  | 
 | ||
|  | use match::smart; | ||
|  | 
 | ||
|  | use Carp; | ||
|  | use Getopt::Long; | ||
|  | use Pod::Usage; | ||
|  | 
 | ||
|  | use Config::General; | ||
|  | use File::Slurper qw{ read_text read_lines }; | ||
|  | use JSON; | ||
|  | use Net::OpenSSH; | ||
|  | #use TryCatch; # Broke in late May 2020 due to a problem with Devel::Declare | ||
|  | use Try::Tiny; | ||
|  | use Log::Handler; | ||
|  | 
 | ||
|  | use HTML::TreeBuilder 5 -weak; | ||
|  | use HTML::Entities; | ||
|  | use List::Util qw{ min max }; | ||
|  | use List::MoreUtils qw{ any }; | ||
|  | use LWP::Simple; | ||
|  | 
 | ||
|  | use DBI; | ||
|  | use SQL::Abstract; | ||
|  | use SQL::Abstract::Plugin::InsertMulti; | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.2.19'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script and directory names | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | #( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | #$DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir       = "$ENV{HOME}/HPR/IA"; | ||
|  | my $logdir        = "$basedir/logs"; | ||
|  | my $logfile       = "$logdir/${PROG}.log"; | ||
|  | my $configfile    = "$basedir/.$PROG.cfg"; | ||
|  | my $db1configfile = "$basedir/.hpr_db.cfg"; | ||
|  | my $database2     = "$basedir/ia.db"; | ||
|  | 
 | ||
|  | my ( $dbh1, $dbh2, $sth1, $h1 ); | ||
|  | 
 | ||
|  | # | ||
|  | # NOTE: Program config not currently used | ||
|  | # | ||
|  | die "Configuration file $configfile not found\n" unless ( -e $configfile ); | ||
|  | 
 | ||
|  | my $conf = Config::General->new( | ||
|  |     -ConfigFile      => $configfile, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1, | ||
|  | ); | ||
|  | my %config = $conf->getall(); | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Options | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Option defaults | ||
|  | # | ||
|  | my $DEFDEBUG = 0; | ||
|  | 
 | ||
|  | # | ||
|  | # Options and arguments | ||
|  | # | ||
|  | my %options; | ||
|  | Options( \%options ); | ||
|  | 
 | ||
|  | # | ||
|  | # Default help shows minimal information | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 ) | ||
|  |     if ( $options{'help'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # The -documentation or -man option shows the full POD documentation through | ||
|  | # a pager for convenience | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 ) | ||
|  |     if ( $options{'documentation'} ); | ||
|  | 
 | ||
|  | my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG ); | ||
|  | 
 | ||
|  | my $db1cfgfile | ||
|  |     = ( defined( $options{dbconfig} ) ? $options{dbconfig} : $db1configfile ); | ||
|  | 
 | ||
|  | my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 ); | ||
|  | my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 ); | ||
|  | 
 | ||
|  | my $mode = $options{mode}; | ||
|  | die "Mode is required\n" unless defined($mode); | ||
|  | $mode = lc($mode); | ||
|  | 
 | ||
|  | my @episodes; | ||
|  | @episodes = @{ $options{episode} } if ( defined( $options{episode} ) ); | ||
|  | if ($mode ne 'update' and @episodes) { | ||
|  |     die "The -episode=N options are only for use in update mode\n"; | ||
|  | } | ||
|  | 
 | ||
|  | my @range; | ||
|  | @range = @{ $options{range} } if ( defined( $options{range} ) ); | ||
|  | if ($mode ne 'update' and @range) { | ||
|  |     die "The -range=N options are only for use in update mode\n"; | ||
|  | } | ||
|  | 
 | ||
|  | _debug( $DEBUG >= 3, '@episodes: ' . Dumper( \@episodes ) ); | ||
|  | _debug( $DEBUG >= 3, '@range: ' . Dumper( \@range ) ); | ||
|  | 
 | ||
|  | # | ||
|  | # Check -episode=N versus -range=N. We can't have both | ||
|  | # | ||
|  | die "Choose only -episode=N or -range=N, not both\n" | ||
|  |     if ( defined( $options{episode} ) && defined( $options{range} ) ); | ||
|  | 
 | ||
|  | # | ||
|  | # Only two range values | ||
|  | # | ||
|  | die "-range=N should not be used more than twice\n" if ($#range gt 2); | ||
|  | 
 | ||
|  | # | ||
|  | # Turn the range into a list of episodes | ||
|  | # | ||
|  | if (defined($options{range})) { | ||
|  |     @episodes = ( min(@range) .. max(@range) ); | ||
|  | } | ||
|  | 
 | ||
|  | @episodes = map { sprintf('%04d',$_)} @episodes; | ||
|  | 
 | ||
|  | _debug( $DEBUG >= 3, '@episodes: ' . Dumper(\@episodes) ); | ||
|  | 
 | ||
|  | die "Too many episodes requested\n" if (scalar(@episodes) > 20); | ||
|  | 
 | ||
|  | my $json = $options{json}; | ||
|  | if ( $mode eq 'json' ) { | ||
|  |     die "JSON file is required\n" unless defined($json); | ||
|  |     die "Unable to find file '$json'\n" unless -e $json; | ||
|  | } | ||
|  | 
 | ||
|  | my $dirfile = $options{scandir}; | ||
|  | if ( $mode eq 'scandir' ) { | ||
|  |     die "Directory listing file is required\n" unless defined($dirfile); | ||
|  |     die "Unable to find file '$dirfile'\n" unless -e $dirfile; | ||
|  | } | ||
|  | 
 | ||
|  | die "Configuration file $db1cfgfile not found\n" unless ( -e $db1cfgfile ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Set up logging keeping the default log layout except for the date. The format | ||
|  | # is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is | ||
|  | # the message. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $log = Log::Handler->new(); | ||
|  | 
 | ||
|  | $log->add( | ||
|  |     file => { | ||
|  |         timeformat => "%Y-%m-%d %H:%M:%S", | ||
|  |         filename   => $logfile, | ||
|  |         minlevel   => 0, | ||
|  |         maxlevel   => 7, | ||
|  |     } | ||
|  | ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Load MySQL configuration data | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $db1conf = Config::General->new( | ||
|  |     -ConfigFile      => $db1cfgfile, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1, | ||
|  | ); | ||
|  | my %db1cfg = $db1conf->getall(); | ||
|  | 
 | ||
|  | if ($verbose) { | ||
|  |     print "MySQL database configuration chosen: $db1cfgfile\n"; | ||
|  |     print "Dry run\n" if $dry_run; | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Connect to the MariaDB database [reverted to mysql temporarily] | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $dbhost = $db1cfg{database}->{host} // '127.0.0.1'; | ||
|  | my $dbport = $db1cfg{database}->{port} // 3306; | ||
|  | my $dbname = $db1cfg{database}->{name}; | ||
|  | my $dbuser = $db1cfg{database}->{user}; | ||
|  | my $dbpwd  = $db1cfg{database}->{password}; | ||
|  | #$dbh1 = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport", | ||
|  | #    $dbuser, $dbpwd, { AutoCommit => 1 } ) | ||
|  | #    or croak $DBI::errstr; | ||
|  | $dbh1 = DBI->connect( "DBI:mysql:database=$dbname;host=$dbhost;port=$dbport", | ||
|  |     $dbuser, $dbpwd, { AutoCommit => 1 } ) | ||
|  |     or croak $DBI::errstr; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable client-side UTF8 [temporary] | ||
|  | # | ||
|  | $dbh1->{mysql_enable_utf8} = 1; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Connect to the SQLite database | ||
|  | #------------------------------------------------------------------------------- | ||
|  | $dbh2 = DBI->connect( "dbi:SQLite:dbname=$database2", "", "" ); | ||
|  | 
 | ||
|  | # | ||
|  | # Log the starting conditions | ||
|  | # | ||
|  | $log->info("Starting $PROG version $VERSION"); | ||
|  | if (@episodes) { | ||
|  |     $log->info('Episodes:', join( ", ", @episodes ) ); | ||
|  | } | ||
|  | else { | ||
|  |     $log->info('Looking for new episodes'); | ||
|  | } | ||
|  | $log->info("Mode: $mode"); | ||
|  | if ($dry_run) { | ||
|  |     $log->info('Dry run: Yes'); | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Set up for connecting to the server with SSH | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $remhost = 'hackerpublicradio.org'; | ||
|  | #my $remport = 22074; | ||
|  | my $remport = 22; | ||
|  | my $remuser = 'hpr'; | ||
|  | my $rempath = '/home/hpr/www/eps/'; | ||
|  | 
 | ||
|  | my $ssh = Net::OpenSSH->new( $remhost, user => $remuser, port => $remport ); | ||
|  | $ssh->error and die "Can't ssh to $remhost: " . $ssh->error; | ||
|  | 
 | ||
|  | $log->info("Connected to ${remuser}\@${remhost}, port $remport"); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Perform the action requested by -mode=X | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ( $mode eq 'initialise' ) { | ||
|  |     print "Initialise mode\n"; | ||
|  |     initialise( $dbh1, $dbh2 ); | ||
|  | } | ||
|  | elsif ( $mode eq 'update' ) { | ||
|  |     print "Update mode\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Range updating is achieved by running the single-shot mode repeatedly | ||
|  |     # | ||
|  |     if ( @episodes ) { | ||
|  |         foreach my $episode (@episodes) { | ||
|  |             update_episode( $dbh1, $dbh2, $episode, $dry_run, $verbose ); | ||
|  |         } | ||
|  |     } | ||
|  |     else { | ||
|  |         update_db( $dbh1, $dbh2, $dry_run, $verbose ); | ||
|  |     } | ||
|  | } | ||
|  | elsif ( $mode eq 'json' ) { | ||
|  |     print "JSON mode\n"; | ||
|  |     load_metadata( $dbh2, $json, $dry_run, $verbose ); | ||
|  |     #load_upload_dates( $dbh2, $json ); | ||
|  | } | ||
|  | elsif ( $mode eq 'scandir' ) { | ||
|  |     print "Scan directory mode\n"; | ||
|  |     scan_dir( $dbh2, $dirfile ); | ||
|  | } | ||
|  | else { | ||
|  |     die "Invalid mode: $mode\n"; | ||
|  | } | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: initialise | ||
|  | #      PURPOSE: Initialise an empty database | ||
|  | #   PARAMETERS: $dbh1           Handle for the MySQL database | ||
|  | #               $dbh2           Handle for the SQLite database | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Runs the various initialisation functions, but only if the | ||
|  | #               database tables 'episodes' and/or 'assets' are empty | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub initialise { | ||
|  |     my ( $dbh1, $dbh2 ) = @_; | ||
|  | 
 | ||
|  |     my ($count); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Initialise the 'episodes' table if it's empty | ||
|  |     # | ||
|  |     $count = count_rows( $dbh2, 'episodes' ); | ||
|  |     if ( $count == 0 ) { | ||
|  |         print "Initialising the 'episodes' table\n"; | ||
|  |         initialise_episodes( $dbh1, $dbh2 ); | ||
|  |     } | ||
|  |     else { | ||
|  |         print "Did not initialise the 'episodes' table ($count rows)\n"; | ||
|  |         $log->info("Did not initialise the 'episodes' table ($count rows)"); | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Initialise the 'assets' table if it's empty | ||
|  |     # | ||
|  |     $count = count_rows( $dbh2, 'assets' ); | ||
|  |     if ( $count == 0 ) { | ||
|  |         print "Initialising the 'assets' table\n"; | ||
|  |         initialise_assets( $dbh1, $dbh2 ); | ||
|  |     } | ||
|  |     else { | ||
|  |         print "Did not initialise the 'assets' table ($count rows)\n"; | ||
|  |         $log->info("Did not initialise the 'assets' table ($count rows)"); | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: initialise_episodes | ||
|  | #      PURPOSE: Initialise an empty 'episodes' table | ||
|  | #   PARAMETERS: $dbh1           Handle for the MySQL database | ||
|  | #               $dbh2           Handle for the SQLite database | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Assuming the episodes table is empty loads all episodes from | ||
|  | #               the MariaDB database | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub initialise_episodes { | ||
|  |     my ( $dbh1, $dbh2 ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sth1, $h1, $count ); | ||
|  | 
 | ||
|  |     $sql1 = q{ | ||
|  |         SELECT | ||
|  |             id, | ||
|  |             date, | ||
|  |             title, | ||
|  |             summary | ||
|  |         FROM eps | ||
|  |     }; | ||
|  | 
 | ||
|  |     $sth1 = $dbh1->prepare($sql1); | ||
|  | 
 | ||
|  |     $sth1->execute(); | ||
|  |     if ( $dbh1->err ) { | ||
|  |         croak $dbh1->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     $count = 0; | ||
|  |     while ( $h1 = $sth1->fetchrow_hashref ) { | ||
|  |         # | ||
|  |         # Fields are: | ||
|  |         # id                    integer, | ||
|  |         # rdate                 date NOT NULL, | ||
|  |         # title                 varchar ( 100 ) NOT NULL, | ||
|  |         # summary               varchar ( 100 ) NOT NULL, | ||
|  |         # uploaded              integer DEFAULT 0, | ||
|  |         # has_files             INTEGER DEFAULT 0, | ||
|  |         # with_files            INTEGER DEFAULT 0, | ||
|  |         # with_derived          INTEGER DEFAULT 0, | ||
|  |         # with_source           INTEGER DEFAULT 0, | ||
|  |         # archive_date          date, | ||
|  |         # item_last_updated     integer | ||
|  |         # IA_URL                text, | ||
|  |         # notes                 text, | ||
|  |         # | ||
|  |         #>>> | ||
|  |         $dbh2->do( | ||
|  |             q{INSERT INTO episodes (id, rdate, title, summary) VALUES(?,?,?,?)}, | ||
|  |             undef, | ||
|  |             $h1->{id}, | ||
|  |             $h1->{date}, | ||
|  |             $h1->{title}, | ||
|  |             $h1->{summary}, | ||
|  |         ) or die $dbh2->errstr; | ||
|  |         #<<< | ||
|  | 
 | ||
|  |         $count++; | ||
|  |     } | ||
|  | 
 | ||
|  |     print "Records added: $count\n"; | ||
|  |     $log->info("Records added to 'episodes' table: $count"); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: initialise_assets | ||
|  | #      PURPOSE: Initialise an empty 'assets' table (during database | ||
|  | #               initialisation) | ||
|  | #   PARAMETERS: $dbh1           Handle for the MariaDB database | ||
|  | #               $dbh2           Handle for the SQLite database | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Assuming the assets table is empty loads all assets (links to | ||
|  | #               files on the HPR server) into the table | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub initialise_assets { | ||
|  |     my ( $dbh1, $dbh2 ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sth1, $h1, $count, $links, $id, $episode, $notes ); | ||
|  |     my %assets; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Collect the notes for shows that have links to other HPR files | ||
|  |     # | ||
|  |     $sql1 = q{ | ||
|  |         SELECT | ||
|  |             id, | ||
|  |             notes | ||
|  |         FROM eps | ||
|  |         WHERE notes REGEXP 'https?://(www.)?(hacker|hobby)publicradio.org/eps/' | ||
|  |     }; | ||
|  | #        LIMIT 20 OFFSET 30 | ||
|  | 
 | ||
|  |     $sth1 = $dbh1->prepare($sql1); | ||
|  | 
 | ||
|  |     $sth1->execute(); | ||
|  |     if ( $dbh1->err ) { | ||
|  |         croak $dbh1->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Process notes one show at a time, recursing into HTML where necessary | ||
|  |     # and building the %assets hash | ||
|  |     # | ||
|  |     $count = 0; | ||
|  |     while ( $h1 = $sth1->fetchrow_hashref ) { | ||
|  |         $links = 0; | ||
|  | 
 | ||
|  |         $id = $h1->{id}; | ||
|  |         $episode = sprintf("%04d", $id); | ||
|  |         $notes = $h1->{notes}; | ||
|  | 
 | ||
|  |         print "Episode: $id\n"; | ||
|  |         #$links = find_links($id,$notes,\%assets); | ||
|  |         $links = find_assets($ssh,$episode,$rempath,\%assets); | ||
|  |         print "Links found: $links\n"; | ||
|  |     } | ||
|  | 
 | ||
|  |     #print Dumper(\%assets),"\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Use the %assets hash to populate the 'assets' table | ||
|  |     # | ||
|  |     foreach my $key (sort(keys(%assets))) { | ||
|  |         foreach my $obj (@{$assets{$key}}) { | ||
|  |             #>>> | ||
|  |             $dbh2->do( | ||
|  |                 q{INSERT INTO assets (episode_id, URL, filename) VALUES(?,?,?)}, | ||
|  |                 undef, | ||
|  |                 $key, | ||
|  |                 $obj->{URL}, | ||
|  |                 $obj->{filename}, | ||
|  |             ) or die $dbh2->errstr; | ||
|  |             #<<< | ||
|  | 
 | ||
|  |             $count++; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     print "Records added: $count\n"; | ||
|  |     $log->info("Records added to 'assets' table: $count"); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: update_db | ||
|  | #      PURPOSE: Loads any additional rows from the MySQL database to the | ||
|  | #               SQLite one. | ||
|  | #   PARAMETERS: $dbh1           Handle for the MySQL database | ||
|  | #               $dbh2           Handle for the SQLite database | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Finds the first gap in the 'id' field of the 'episodes' table | ||
|  | #               in the SQLite database. This is the starting show number it | ||
|  | #               uses when querying the MySQL database. The function loops over | ||
|  | #               the 'eps' table in the MySQL database returning details for | ||
|  | #               storage in the SQLite database. Before storing it double | ||
|  | #               checks that the row in question does not exist. | ||
|  | #               TODO | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub update_db { | ||
|  |     my ( $dbh1, $dbh2, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sql2, $sth1, $h1, $sth2, $h2 ); | ||
|  |     my ( $ep_count, $ffree); | ||
|  |     my ( %assets, $asset_count, $assets_added, $assets_deleted, $assets_updated ); | ||
|  |     my ( $id, $episode, $notes, $links ); | ||
|  | 
 | ||
|  |     print "Updating SQLite database from MySQL\n" if $verbose; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find the first episode id gap in the SQLite database | ||
|  |     # | ||
|  |     $sql1 = q{ | ||
|  |         SELECT MIN (id) + 1 AS first_free | ||
|  |         FROM episodes t1 | ||
|  |         WHERE NOT EXISTS ( | ||
|  |             SELECT * | ||
|  |             FROM episodes t2 | ||
|  |             WHERE t1.id + 1 = t2.id) | ||
|  |     }; | ||
|  | 
 | ||
|  |     $sth1 = $dbh2->prepare($sql1); | ||
|  | 
 | ||
|  |     $sth1->execute(); | ||
|  |     if ( $dbh1->err ) { | ||
|  |         croak $dbh1->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     $h1    = $sth1->fetchrow_hashref; | ||
|  |     $ffree = $h1->{first_free}; | ||
|  |     print "First free id: $ffree\n" if $verbose; | ||
|  | 
 | ||
|  |     # | ||
|  |     # SQL to find new rows in the MySQL database | ||
|  |     # | ||
|  |     $sql1 = q{ | ||
|  |         SELECT | ||
|  |             id, | ||
|  |             date, | ||
|  |             title, | ||
|  |             summary, | ||
|  |             notes | ||
|  |         FROM eps | ||
|  |         WHERE id >= ? | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # SQL to look for pre-existing episodes in the SQLite database | ||
|  |     # | ||
|  |     $sql2 = q{ | ||
|  |         SELECT * FROM episodes WHERE id = ? | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prepare the SQL statements | ||
|  |     # | ||
|  |     $sth1 = $dbh1->prepare($sql1); | ||
|  |     $sth2 = $dbh2->prepare($sql2); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Query MySQL, from $ffree onwards | ||
|  |     # | ||
|  |     $sth1->execute($ffree); | ||
|  |     if ( $dbh1->err ) { | ||
|  |         croak $dbh1->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Loop through the MySQL rows | ||
|  |     # | ||
|  |     $ep_count = $asset_count = 0; | ||
|  |     while ( $h1 = $sth1->fetchrow_hashref ) { | ||
|  |         # | ||
|  |         # Query the SQLite database for the episode number we just got | ||
|  |         # | ||
|  |         $sth2->execute( $h1->{id} ); | ||
|  |         if ( $dbh2->err ) { | ||
|  |             croak $dbh2->errstr; | ||
|  |         } | ||
|  |         $h2 = $sth2->fetchrow_hashref; | ||
|  | 
 | ||
|  |         # | ||
|  |         # If the row isn't already there create it | ||
|  |         # | ||
|  |         unless ($h2) { | ||
|  |             unless ($dry_run) { | ||
|  |                 #>>> | ||
|  |                 $dbh2->do( | ||
|  |                     q{INSERT INTO episodes (id, rdate, title, summary) VALUES(?,?,?,?)}, | ||
|  |                     undef, | ||
|  |                     $h1->{id}, | ||
|  |                     $h1->{date}, | ||
|  |                     $h1->{title}, | ||
|  |                     $h1->{summary}, | ||
|  |                 ) or die $dbh2->errstr; | ||
|  |                 #<<< | ||
|  | 
 | ||
|  |                 printf "Added episode: %d\n", $h1->{id} if $verbose; | ||
|  | 
 | ||
|  |                 $ep_count++; | ||
|  |             } | ||
|  |             else { | ||
|  |                 printf | ||
|  |                     "Would have inserted a row into the 'episodes' table (%s)\n", | ||
|  |                     $h1->{id}; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check this show for "assets" (whether we have the episode already or | ||
|  |         # not). Even in dry-run mode this gets executed | ||
|  |         # | ||
|  |         $links = 0; | ||
|  | 
 | ||
|  |         $id = $h1->{id}; | ||
|  |         $episode = sprintf("%04d", $id); | ||
|  |         # $notes = $h1->{notes}; | ||
|  |         # $links = find_links($id,$notes,\%assets); | ||
|  |         $links = find_assets($ssh,$episode,$rempath,\%assets); | ||
|  | 
 | ||
|  |         $asset_count += $links; | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # In debug mode show what assets were collected as we iterated through the | ||
|  |     # new shows. | ||
|  |     # | ||
|  |     _debug( $DEBUG >= 2, '%assets: ' . Dumper(\%assets) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Add any assets collected from the episodes just processed | ||
|  |     # | ||
|  |     $assets_added = 0; | ||
|  |     if ($asset_count > 0) { | ||
|  |         ( $assets_added, $assets_deleted, $assets_updated ) = | ||
|  |             process_assets( $dbh2, \%assets, 1, $dry_run, $verbose ); | ||
|  |     } | ||
|  | 
 | ||
|  |     printf "%-16s %d\n", "Episodes added:", $ep_count; | ||
|  |     printf "%-16s %d\n", "Assets found:",   $asset_count; | ||
|  |     printf "%-16s %d\n", "Assets added:",   coalesce( $assets_added,   0 ); | ||
|  |     printf "%-16s %d\n", "Assets deleted:", coalesce( $assets_deleted, 0 ); | ||
|  |     printf "%-16s %d\n", "Assets updated:", coalesce( $assets_updated, 0 ); | ||
|  | 
 | ||
|  |     $log->info("Episodes added: $ep_count"); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: process_assets | ||
|  | #      PURPOSE: Given a collection of asset details obtained by searching | ||
|  | #               notes for a given range of episodes determine whether they | ||
|  | #               need to be added to the SQLite database. | ||
|  | #   PARAMETERS: $dbh            SQLite database handle | ||
|  | #               $assets         hashref pointing to the collected assets which | ||
|  | #                               have been found by parsing the notes of all | ||
|  | #                               relevant shows prior to calling this routine. | ||
|  | #               $new            Boolean flag to indicate that we're dealing with a new | ||
|  | #                               show or updating an old one | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Number of assets added | ||
|  | #  DESCRIPTION: The %assets hash has been accumulated by scanning all the | ||
|  | #               notes belonging to new episodes found in the MySQL database. | ||
|  | #               The hash is keyed by the episode number and it is scanned in | ||
|  | #               sorted order of the key. The SQLite database is queried for | ||
|  | #               assets with this key and the result held in a hash keyed by | ||
|  | #               the filename. The value of each key in the %assets hash is an | ||
|  | #               array of hashes holding the asset details, so we loop through | ||
|  | #               these. If the filename of a particular asset hash is not found | ||
|  | #               in the hash from the database then we create a new row in the | ||
|  | #               'assets' table (assuming that we're not in dry run mode where | ||
|  | #               we simply report what we would have done). If any assets were | ||
|  | #               added for an episode then we set the flag 'has_files' (unless | ||
|  | #               in dry run mode). The total number of assets added to the | ||
|  | #               database is returned by the function. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub process_assets { | ||
|  |     my ( $dbh, $assets, $new, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql, $sth, $db_assets, $key_field, $db_count ); | ||
|  |     my ( $asset_count, $assets_added, $assets_deleted, $assets_updated ); | ||
|  | 
 | ||
|  |     $key_field = 'id'; | ||
|  | 
 | ||
|  |     # | ||
|  |     # We should only be called if there are assets, but just in case... | ||
|  |     # | ||
|  |     return ( 0, 0, 0 ) unless ($assets); | ||
|  | 
 | ||
|  |     # | ||
|  |     # SQL to look for pre-existing assets in the SQLite database | ||
|  |     # | ||
|  |     $sql = q{ | ||
|  |         SELECT * FROM assets WHERE episode_id = ? | ||
|  |     }; | ||
|  |     $sth = $dbh->prepare($sql); | ||
|  | 
 | ||
|  |     say 'Processing assets for ', ( $new ? 'new' : 'existing' ), ' show' | ||
|  |         if $verbose; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Use the %assets hash to populate the 'assets' table. The hash is keyed | ||
|  |     # by episode numbers, and each entry consists of an array of hashes each | ||
|  |     # containing the filename and the URL of the asset. | ||
|  |     # | ||
|  |     $assets_added = $assets_deleted = $assets_updated = 0; | ||
|  |     foreach my $key ( sort( keys(%$assets) ) ) { | ||
|  |         print "Checking episode $key\n" if $verbose; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Count the assets for the current show | ||
|  |         # | ||
|  |         $asset_count = scalar( @{ $assets->{$key} } ); | ||
|  | 
 | ||
|  |         _debug( $DEBUG >= 2, "Episode: $key, Asset_count: $asset_count" ); | ||
|  | 
 | ||
|  |        # | ||
|  |        # Query the SQLite database for the episode number we just got, looking | ||
|  |        # for entries in the 'assets' table | ||
|  |        # | ||
|  |         $sth->execute($key); | ||
|  |         if ( $dbh->err ) { | ||
|  |             croak $dbh->errstr; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Grab everything as a hash keyed on the 'filename' field so we can | ||
|  |         # check it easily. Count what we got (i.e. count the number of assets | ||
|  |         # in SQLite for this episode). | ||
|  |         # NOTE: trying the idea of indexing $db_assets by the 'id' field (in | ||
|  |         # $key_field) | ||
|  |         # | ||
|  |         #        $db_assets = $sth->fetchall_hashref('filename'); | ||
|  |         $db_assets = $sth->fetchall_hashref($key_field); | ||
|  |         $db_count  = scalar( keys(%$db_assets) ); | ||
|  | 
 | ||
|  |         _debug( $DEBUG >= 2, "Assets in ia.db:\n" . Dumper($db_assets) ); | ||
|  |         _debug( $DEBUG >= 2, "DB count: $db_count" ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Compare the number of "real" assets with what SQLite holds. | ||
|  |         # | ||
|  |         if ( $asset_count > $db_count ) { | ||
|  |             # | ||
|  |             # We need to add assets. If it's a newly added show which has | ||
|  |             # assets this will always be necessary. If it's an existing show | ||
|  |             # then we're repairing an anomaly and want to say so. | ||
|  |             # | ||
|  |             if ($new) { | ||
|  |                 say "Adding assets" if $verbose; | ||
|  |             } | ||
|  |             else { | ||
|  |                 say "** Found more real assets than in DB -> adding"; | ||
|  |             } | ||
|  |             $assets_added += insert_assets( $dbh, $key, $assets, $db_assets, | ||
|  |                 $dry_run, $verbose ); | ||
|  |         } | ||
|  |         elsif ( $asset_count < $db_count ) { | ||
|  |             # | ||
|  |             # We need to delete assets. This can't happen for an existing | ||
|  |             # show, so it's always a noteworthy event. | ||
|  |             # | ||
|  |             say "** Found fewer real assets than in DB -> deleting"; | ||
|  |             $assets_deleted += delete_assets( $dbh, $key, $assets, $db_assets, | ||
|  |                 $dry_run, $verbose ); | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Look for field differences in what we have after the inserts and | ||
|  |         # deletes | ||
|  |         # | ||
|  |         if ( $assets_added > 0 || $assets_deleted > 0 ) { | ||
|  |             $sth->execute($key); | ||
|  |             if ( $dbh->err ) { | ||
|  |                 croak $dbh->errstr; | ||
|  |             } | ||
|  |             $db_assets = $sth->fetchall_hashref($key_field); | ||
|  |         } | ||
|  |         my @results | ||
|  |             = synchronise_found_assets( $dbh, $key, $assets, $db_assets, | ||
|  |             $dry_run, $verbose ); | ||
|  | 
 | ||
|  |         $assets_added   += $results[0]; | ||
|  |         $assets_deleted += $results[1]; | ||
|  |         $assets_updated += $results[2]; | ||
|  | 
 | ||
|  |         _debug( $DEBUG >= 4, '@results ' . Dumper( \@results ) ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # If there are assets then we need to ensure the 'has_files' setting | ||
|  |         # reflects it (unless in dry run mode) | ||
|  |         # | ||
|  |         if ( $asset_count > 0 ) { | ||
|  |             unless ($dry_run) { | ||
|  |                 $dbh->do( | ||
|  |                     q{ | ||
|  |                         UPDATE episodes | ||
|  |                         SET has_files = 1 | ||
|  |                         WHERE id = ? | ||
|  |                         AND has_files = 0 | ||
|  |                     }, | ||
|  |                     undef, | ||
|  |                     $key, | ||
|  |                 ) or die $dbh->errstr; | ||
|  |             } | ||
|  |             else { | ||
|  |                 say "Would have updated the 'episodes.has_files' ", | ||
|  |                     "field for $key (if needed)"; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         #        $assets_added += $asset_count; | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     return ( $assets_added, $assets_deleted, $assets_updated ); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: synchronise_found_assets | ||
|  | #      PURPOSE: Compares the assets found by searching the shownotes (and | ||
|  | #               other files) with those in the SQLite database and updates | ||
|  | #               what differs. | ||
|  | #   PARAMETERS: $dbh            SQLite database handle | ||
|  | #               $episode        Numeric episode | ||
|  | #               $assets         Hashref pointing to the collected assets which | ||
|  | #                               have been found by parsing the show's notes; | ||
|  | #                               keyed by episode number. Each keyed object is | ||
|  | #                               an arrayref of hashes containing the URL and | ||
|  | #                               filename of the asset. | ||
|  | #               $db_assets      Hash of assets from SQLite, keyed by filename. | ||
|  | #                               Each keyed object is a hash of columns from | ||
|  | #                               the database: id, episode_id, URL, filename, | ||
|  | #                               uploaded. | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Number of additions, deletions and updates | ||
|  | #  DESCRIPTION: The assets for this episode have been found by searching the | ||
|  | #               HTML of the show notes and any linked HTML files for files on | ||
|  | #               the HPR server.  The equivalent assets have also been | ||
|  | #               collected from the SQLite database. A comparison is made | ||
|  | #               between these items and if they differ the definitive details | ||
|  | #               collected from the MySQL database are used. There are three | ||
|  | #               actions possible: (1) if the URL or filename fields differ | ||
|  | #               then we can work out the update and apply it, (2) if the | ||
|  | #               incoming assets don't match any in those from the database | ||
|  | #               then we can delete the database ones, and (3) we can add the | ||
|  | #               incoming assets we also couldn't match because they'll be the | ||
|  | #               replecements!  For all actions changes are made to the | ||
|  | #               versions in the SQLite database using SQL::Abstract to | ||
|  | #               construct the queries to do it. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: TODO: There may be multiple asset changes for the episode. | ||
|  | #               Does the SQL::Abstract call perform the multiple changes? | ||
|  | #               Needs testing. | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub synchronise_found_assets { | ||
|  |     my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ( $sth, $additions, $deletions, $updates, $update_count, $index); | ||
|  |     my ( @fieldvals, @where, @db_rows, @asset_rows, %matches, %changes ); | ||
|  | 
 | ||
|  |     $additions = $deletions = $updates = 0; | ||
|  |     $update_count = 0; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Temporary? | ||
|  |     # | ||
|  |     if ( $DEBUG >= 4 ) { | ||
|  |         _debug( 1, 'In subroutine synchronise_found_assets' ); | ||
|  |         _debug( 1, "\$episode: $episode" ); | ||
|  |         _debug( 1, '%$assets: ' . Dumper($assets) ); | ||
|  |         _debug( 1, '%$db_assets: ' . Dumper($db_assets) ); | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 4, 'Looking for assets that match - to exclude them' ); | ||
|  | 
 | ||
|  |     @db_rows = ( keys(%$db_assets) ); | ||
|  |     @asset_rows = ( 0 .. scalar( @{ $assets->{$episode} } ) - 1 ); | ||
|  |     _debug( $DEBUG >= 4, 'Initial @db_rows ' . Dumper( \@db_rows ) ); | ||
|  |     _debug( $DEBUG >= 4, 'Initial @asset_rows ' . Dumper( \@asset_rows ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find any asset pair matches | ||
|  |     # | ||
|  |     $index = 0; | ||
|  |     foreach my $db_key ( keys(%$db_assets) ) { | ||
|  |         foreach my $a_obj ( @{ $assets->{$episode} } ) { | ||
|  |             if ( assets_match( $a_obj, $db_assets->{$db_key} ) == 2 ) { | ||
|  |                 $matches{$db_key} = [ $a_obj, $db_assets->{$db_key} ]; | ||
|  | #                splice( @asset_rows, $index, 1 ); | ||
|  |                 @asset_rows = grep { $_ != $index } @asset_rows; | ||
|  |                 $index++; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     _debug( $DEBUG >= 4, 'Assets to ignore: %matches ' . Dumper( \%matches ) ); | ||
|  |     _debug( $DEBUG >= 4, 'Current @asset_rows ' . Dumper( \@asset_rows ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # If everything matched then there's nothing to do, so return zero and | ||
|  |     # leave. | ||
|  |     # | ||
|  | #    if (scalar(@db_rows) == scalar( keys(%matches) )) { | ||
|  |     if ( scalar(@asset_rows) == 0 ) { | ||
|  |         _debug( $DEBUG >= 4, 'All rows match, so nothing to do' ); | ||
|  |         return ( 0, 0, 0 ); | ||
|  |     } | ||
|  |     _debug( $DEBUG >= 4, 'Some rows do not match' ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Remove the row ids for any matches | ||
|  |     # | ||
|  |     @db_rows = grep { !exists( $matches{$_} ) } @db_rows; | ||
|  |     _debug( $DEBUG >= 4, '@db_rows after matches ' . Dumper( \@db_rows ) ); | ||
|  |     _debug( $DEBUG >= 4, '@asset_rows after matches ' . Dumper( \@asset_rows ) ); | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 4, 'Looking for assets that match in only one attribute' ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find any cases where only one element of the asset object matches. This | ||
|  |     # means we have a simple change. | ||
|  |     # | ||
|  |     $index = 0; | ||
|  |     foreach my $db_key (@db_rows) { | ||
|  |         foreach my $a_obj ( @{ $assets->{$episode} } ) { | ||
|  |             if ( assets_match( $a_obj, $db_assets->{$db_key} ) == 1 ) { | ||
|  |                 $changes{$db_key} = [ $a_obj, $db_assets->{$db_key} ]; | ||
|  |                 @asset_rows = grep { $_ != $index } @asset_rows; | ||
|  |                 $index++; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     _debug( $DEBUG >= 4, 'Assets to change: %changes ' . Dumper( \%changes ) ); | ||
|  |     _debug( $DEBUG >= 4, 'Current @asset_rows ' . Dumper( \@asset_rows ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Remove the row ids for any changes | ||
|  |     # | ||
|  |     @db_rows = grep { !exists( $changes{$_} ) } @db_rows; | ||
|  |     _debug( $DEBUG >= 4, '@db_rows after changes ' . Dumper( \@db_rows ) ); | ||
|  |     _debug( $DEBUG >= 4, '@asset_rows after changes ' . Dumper( \@asset_rows ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # If there are changes, do them now | ||
|  |     # | ||
|  |     if (%changes) { | ||
|  |         _debug( $DEBUG >= 4, 'Collecting changes' ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # We may have multiple database rows to process | ||
|  |         # | ||
|  |         $index        = 0; | ||
|  | 
 | ||
|  |         foreach my $key ( keys(%changes) ) { | ||
|  |             $fieldvals[$index] = {}; | ||
|  |             $where[$index]     = { episode_id => $episode }; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Compare the fields we're interested in, accumulating differences | ||
|  |             # (expecting only one per key). | ||
|  |             # | ||
|  |             foreach my $fld ( 'filename', 'URL' ) { | ||
|  |                 if (!equal( | ||
|  |                         $changes{$key}->[0]->{$fld}, | ||
|  |                         $changes{$key}->[1]->{$fld} | ||
|  |                     ) | ||
|  |                     ) | ||
|  |                 { | ||
|  |                     $update_count++; | ||
|  |                     $fieldvals[$index]->{$fld} = $changes{$key}->[0]->{$fld}; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             $where[$index]->{id} = $key; | ||
|  | 
 | ||
|  |             $index++; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 2, '@fieldvals: ' . Dumper( \@fieldvals ) ); | ||
|  |     _debug( $DEBUG >= 2, '@where ' . Dumper( \@where ) ); | ||
|  | 
 | ||
|  |     if ( $update_count > 0 ) { | ||
|  |         _debug( $DEBUG >= 4, 'Performing changes' ); | ||
|  |         # | ||
|  |         # Prepare to use SQL::Abstract | ||
|  |         # | ||
|  |         my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |         foreach my $fv (@fieldvals) { | ||
|  |             my ( $stmt, @bindvals ) | ||
|  |                 = $sql->update( 'assets', $fv, shift(@where) ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Perform the updates (unless in dry-run mode) | ||
|  |             # | ||
|  |             unless ($dry_run) { | ||
|  |                 $sth = $dbh->prepare($stmt); | ||
|  |                 $sth->execute(@bindvals); | ||
|  |                 if ( $dbh->err ) { | ||
|  |                     warn "Processing $episode\n", $dbh->errstr; | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     print "Updated ", join( ',', keys(%$fv) ), "\n" | ||
|  |                         if $verbose; | ||
|  |                     $updates++; | ||
|  |                 } | ||
|  |             } | ||
|  |             else { | ||
|  |                 print "Dry run mode:\n"; | ||
|  |                 print "Statement: $stmt\n"; | ||
|  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # If anything is left in @db_rows then they will be replacements | ||
|  |     # | ||
|  |     if ( scalar(@db_rows) > 0 ) { | ||
|  |         say "Replacements found: ", scalar(@db_rows); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Prepare to use SQL::Abstract | ||
|  |         # | ||
|  |         my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Every row which came from the database is to be deleted | ||
|  |         # | ||
|  |         $index = 0; | ||
|  |         foreach my $db_key (@db_rows) { | ||
|  |             $where[$index] = { episode_id => $episode }; | ||
|  |             $where[$index]->{id} = $db_key; | ||
|  |             $index++; | ||
|  |         } | ||
|  |         _debug( $DEBUG >= 2, '@where ' . Dumper( \@where ) ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Do deletions | ||
|  |         # | ||
|  |         foreach my $db_key (@db_rows) { | ||
|  |             my ( $stmt, @bindvals ) | ||
|  |                 = $sql->delete( 'assets', shift(@where) ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Perform the deletions (unless in dry-run mode) | ||
|  |             # | ||
|  |             unless ($dry_run) { | ||
|  |                 $sth = $dbh->prepare($stmt); | ||
|  |                 $sth->execute(@bindvals); | ||
|  |                 if ( $dbh->err ) { | ||
|  |                     warn "Processing $episode\n", $dbh->errstr; | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     print "Deleted ", join( ',', @db_rows ), "\n" | ||
|  |                         if $verbose; | ||
|  |                     $deletions++; | ||
|  |                 } | ||
|  |             } | ||
|  |             else { | ||
|  |                 print "Dry run mode:\n"; | ||
|  |                 print "Statement: $stmt\n"; | ||
|  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Every object in the found assets array for this episode is to be | ||
|  |         # added. | ||
|  |         # | ||
|  |         foreach my $i (@asset_rows) { | ||
|  |             my $obj = $assets->{$episode}->[$i]; | ||
|  |             $obj->{episode_id} = $episode; | ||
|  | 
 | ||
|  |             my ( $stmt, @bindvals ) | ||
|  |                 = $sql->insert( 'assets', $obj ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Perform the additions (unless in dry-run mode) | ||
|  |             # | ||
|  |             unless ($dry_run) { | ||
|  |                 $sth = $dbh->prepare($stmt); | ||
|  |                 $sth->execute(@bindvals); | ||
|  |                 if ( $dbh->err ) { | ||
|  |                     warn "Processing $episode\n", $dbh->errstr; | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     print "Inserted array elements ", join( ',', @asset_rows ), "\n" | ||
|  |                         if $verbose; | ||
|  |                     $additions++; | ||
|  |                 } | ||
|  |             } | ||
|  |             else { | ||
|  |                 print "Dry run mode:\n"; | ||
|  |                 print "Statement: $stmt\n"; | ||
|  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     return ( $additions, $deletions, $updates ); | ||
|  | } | ||
|  | 
 | ||
|  | ################################################################################ | ||
|  |     # | ||
|  |     # Look for database assets to be ignored. The outer loop iterates through | ||
|  |     # the primary key values of $db_assets where the rows all match the | ||
|  |     # $episode value. For each of these assets the next loop iterates through | ||
|  |     # the array of assets in $assets->{$episode} comparing the fields we want | ||
|  |     # to check in the next inner loop. If the checked fields are all equal | ||
|  |     # then there will be nothing to do. | ||
|  |     # | ||
|  | #    foreach my $db_key ( keys(%$db_assets) ) { | ||
|  | #        foreach my $a_obj ( @{ $assets->{$episode} } ) { | ||
|  | #            $ignore = $delete = $update = 0; | ||
|  | #            foreach my $fld ( 'filename', 'URL' ) { | ||
|  | #                $ignore | ||
|  | #                    += equal( $db_assets->{$db_key}->{$fld}, $a_obj->{$fld} ); | ||
|  | #                $delete | ||
|  | #                    += !equal( $db_assets->{$db_key}->{$fld}, $a_obj->{$fld} ); | ||
|  | #            } | ||
|  | #            push( @{ $row_actions{'ignore'} }, $db_assets->{$db_key}->{id} ) | ||
|  | #                if $ignore == 2; | ||
|  | #            push( @{ $row_actions{'delete'} }, $db_assets->{$db_key}->{id} ) | ||
|  | #                if $delete == 2; | ||
|  | #        } | ||
|  | #    } | ||
|  | # | ||
|  | #    _debug( $DEBUG >= 4, '%row_actions ' . Dumper( \%row_actions ) ); | ||
|  | # | ||
|  |     # | ||
|  |     # Scan the array of hashes obtained from the shownotes for $episode and | ||
|  |     # record things that need changing. | ||
|  |     # | ||
|  | #    $index = 0; | ||
|  | #    $updates = 0; | ||
|  | #    foreach my $obj ( @{ $assets->{$episode} } ) { | ||
|  | #        $fieldvals[$index] = {}; | ||
|  | # | ||
|  | #        # | ||
|  | #        # Compare the fields we're interested in | ||
|  | #        # | ||
|  | #        foreach my $fld ( 'filename', 'URL' ) { | ||
|  | #            if ( !equal( $obj->{$fld}, $db_assets->{$obj->{filename}}->{$fld} ) ) { | ||
|  | #                $updates++; | ||
|  | #                $fieldvals[$index]->{$fld} = $obj->{$fld}; | ||
|  | #            } | ||
|  | #        } | ||
|  | # | ||
|  | #        # TODO | ||
|  | #        $fieldvals[$index]->{id} = $db_assets->{$obj->{filename}}->{id}; | ||
|  | # | ||
|  | #        $index++; | ||
|  | #    } | ||
|  | # | ||
|  | #    _debug( $DEBUG >= 2, '@fieldvals: ' . Dumper( \@fieldvals ) ); | ||
|  | # | ||
|  | #    if ($updates > 0 ) { | ||
|  | #        # | ||
|  | #        # Prepare to use SQL::Abstract | ||
|  | #        # | ||
|  | #        my $sql = SQL::Abstract->new(); | ||
|  | # | ||
|  | #        $where{episode_id} = $episode; | ||
|  | # | ||
|  | #        foreach my $fv (@fieldvals) { | ||
|  | #            $where{id} = $fv->{id}; | ||
|  | #            delete($fv->{id}); | ||
|  | # | ||
|  | #            my ( $stmt, @bindvals ) | ||
|  | #                = $sql->update( 'assets', $fv, \%where ); | ||
|  | # | ||
|  | #            # | ||
|  | #            # Perform the updates (unless in dry-run mode) | ||
|  | #            # | ||
|  | #            unless ($dry_run) { | ||
|  | #                $sth = $dbh->prepare($stmt); | ||
|  | #                $sth->execute(@bindvals); | ||
|  | #                if ( $dbh->err ) { | ||
|  | #                    warn "Processing $episode\n", $dbh->errstr; | ||
|  | #                } | ||
|  | #                else { | ||
|  | #                    print "Updated ", join( ',', keys(%$fv) ), "\n" | ||
|  | #                        if $verbose; | ||
|  | #                } | ||
|  | #            } | ||
|  | #            else { | ||
|  | #                print "Dry run mode:\n"; | ||
|  | #                print "Statement: $stmt\n"; | ||
|  | #                print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  | #            } | ||
|  | #        } | ||
|  | #    } | ||
|  | # | ||
|  | ################################################################################ | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: insert_assets | ||
|  | #      PURPOSE: Find missing assets and add them to SQLite | ||
|  | #   PARAMETERS: $dbh            SQLite database handle | ||
|  | #               $episode        Numeric episode | ||
|  | #               $assets         Hashref pointing to the collected assets which | ||
|  | #                               have been found by parsing the show's notes; | ||
|  | #                               keyed by episode number. | ||
|  | #               $db_assets      Hash of assets from SQLite, keyed by filename | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Number of insertions | ||
|  | #  DESCRIPTION: The assets for this episode have been found by searching the | ||
|  | #               HTML of the show notes and any linked HTML files for files on | ||
|  | #               the HPR server.  The equivalent assets have also been | ||
|  | #               collected from the SQLite database. The MySQL assets are | ||
|  | #               reorganised by filename. For each file which does not already | ||
|  | #               exist in the SQLite list details are prepared and passed to | ||
|  | #               SQL::Abstract using the multi insert feature. The missing | ||
|  | #               details are inserted into the assets table unless dry-run mode | ||
|  | #               is on. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub insert_assets { | ||
|  |     my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ($sth, %db_by_file, %assets_by_file, $insertions, @multi_insert, %where); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Reorder the %$db_assets hash in filename order | ||
|  |     # | ||
|  |     foreach my $key ( keys( %{$db_assets} ) ) { | ||
|  |         $db_by_file{$db_assets->{$key}->{filename}} = $db_assets->{$key}; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Reorder the %assets hash in filename order | ||
|  |     # | ||
|  |     foreach my $obj ( @{ $assets->{$episode} } ) { | ||
|  |         $assets_by_file{$obj->{filename}} = $obj; | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 2, "%db_by_file: " . Dumper( \%db_by_file ) ); | ||
|  |     _debug( $DEBUG >= 2, "%assets_by_file: " . Dumper( \%assets_by_file ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find stuff for insertion. We need to build @multi_insert such that it | ||
|  |     # contains a list of hashrefs, each containing the fields we want to | ||
|  |     # insert. | ||
|  |     # Note: We make a new %fv each time, fill it as a slice, and push it into | ||
|  |     # the array @multi_insert. This may be rather obscure and therefore | ||
|  |     # error-prone. | ||
|  |     # | ||
|  |     $insertions = 0; | ||
|  |     foreach my $key ( keys(%assets_by_file) ) { | ||
|  | #        if ( !exists( $db_assets->{$key} ) ) { | ||
|  |         if ( !exists( $db_by_file{$key} ) ) { | ||
|  |             my %fv; | ||
|  |             $insertions++; | ||
|  |             @fv{ 'episode_id', 'URL', 'filename' } = ( | ||
|  |                 $episode, $assets_by_file{$key}->{URL}, | ||
|  |                 $assets_by_file{$key}->{filename} | ||
|  |             ); | ||
|  |             push( @multi_insert, \%fv ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 2, '@multi_insert: ' . Dumper( \@multi_insert ) ); | ||
|  | 
 | ||
|  |     if ($insertions > 0) { | ||
|  |         # | ||
|  |         # Prepare to use SQL::Abstract | ||
|  |         # | ||
|  |         my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |         my ( $stmt, @bindvals ) | ||
|  |             = $sql->insert_multi( 'assets', \@multi_insert ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Perform the insertions (unless in dry-run mode) | ||
|  |         # | ||
|  |         unless ($dry_run) { | ||
|  |             $sth = $dbh->prepare($stmt); | ||
|  |             $sth->execute(@bindvals); | ||
|  |             if ( $dbh->err ) { | ||
|  |                 warn "Processing $episode\n", $dbh->errstr; | ||
|  |             } | ||
|  |             else { | ||
|  | #                print "Inserted ",join(',',keys(%where)),"\n" if $verbose; | ||
|  |                 print "Inserted $insertions assets\n" if $verbose; | ||
|  |             } | ||
|  |         } | ||
|  |         else { | ||
|  |             print "Dry run mode:\n"; | ||
|  |             print "Statement: $stmt\n"; | ||
|  |             print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return $insertions; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: delete_assets | ||
|  | #      PURPOSE: Find superfluous assets for an episode in the 'assets' table | ||
|  | #               and delete them. | ||
|  | #   PARAMETERS: $dbh            SQLite database handle | ||
|  | #               $episode        Numeric episode | ||
|  | #               $assets         Hashref pointing to the collected assets which | ||
|  | #                               have been found by parsing the show's notes; | ||
|  | #                               keyed by episode number. | ||
|  | #               $db_assets      Hash of assets from SQLite, keyed by filename | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Number of deletions | ||
|  | #  DESCRIPTION: The assets for this episode have been found by searching the | ||
|  | #               HTML of the show notes and any linked HTML files for files on | ||
|  | #               the HPR server.  The equivalent assets have also been | ||
|  | #               collected from the SQLite database. The MySQL assets are | ||
|  | #               reorganised by filename. For each file exists in the SQLite | ||
|  | #               list details are prepared and passed to SQL::Abstract. The | ||
|  | #               unwanted details are deleted from the assets table unless | ||
|  | #               dry-run mode is on. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub delete_assets { | ||
|  |     my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ($sth, %db_by_file, %assets_by_file, $deletions, %where); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Reorder the %$db_assets hash in filename order | ||
|  |     # | ||
|  |     foreach my $key ( keys( %{$db_assets} ) ) { | ||
|  |         $db_by_file{$db_assets->{$key}->{filename}} = $db_assets->{$key}; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Reorder the %assets hash in filename order | ||
|  |     # | ||
|  |     foreach my $obj ( @{ $assets->{$episode} } ) { | ||
|  |         $assets_by_file{$obj->{filename}} = $obj; | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 2, "%db_by_file: " . Dumper( \%db_by_file ) ); | ||
|  |     _debug( $DEBUG >= 2, "%assets_by_file: " . Dumper( \%assets_by_file ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Only delete assets with this value in the 'episode_id' field | ||
|  |     # | ||
|  |     $where{episode_id} = $episode; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find stuff for deletion. We need to build %where such that it generates | ||
|  |     # 'WHERE filename = <value1> OR filename = <value2> ...', so we add these | ||
|  |     # values to an (anonymous) array. | ||
|  |     # | ||
|  |     $deletions = 0; | ||
|  |     foreach my $key ( keys(%$db_assets) ) { | ||
|  | #        if ( !exists( $assets_by_file{$key} ) ) { | ||
|  |         if ( !exists( $db_by_file{$key} ) ) { | ||
|  |             $deletions++; | ||
|  |             if ( !exists( $where{filename} ) ) { | ||
|  |                 $where{filename} = [$key]; | ||
|  |             } | ||
|  |             else { | ||
|  |                 push( @{ $where{filename} }, $key ); | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 2, "%where: " . Dumper( \%where ) ); | ||
|  | 
 | ||
|  |     if ($deletions > 0) { | ||
|  |         # | ||
|  |         # Prepare to use SQL::Abstract | ||
|  |         # | ||
|  |         my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |         my ( $stmt, @bindvals ) | ||
|  |             = $sql->delete( 'assets', \%where ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Perform the deletions (unless in dry-run mode) | ||
|  |         # | ||
|  |         unless ($dry_run) { | ||
|  |             $sth = $dbh->prepare($stmt); | ||
|  |             $sth->execute(@bindvals); | ||
|  |             if ( $dbh->err ) { | ||
|  |                 warn "Processing $episode\n", $dbh->errstr; | ||
|  |             } | ||
|  |             else { | ||
|  |                 print "Deleted ",join(',',keys(%where)),"\n" if $verbose; | ||
|  |             } | ||
|  |         } | ||
|  |         else { | ||
|  |             print "Dry run mode:\n"; | ||
|  |             print "Statement: $stmt\n"; | ||
|  |             print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return $deletions; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: update_episode | ||
|  | #      PURPOSE: Perform a database update for a specific episode | ||
|  | #   PARAMETERS: $dbh1           Handle for the MariaDB database | ||
|  | #               $dbh2           Handle for the SQLite database | ||
|  | #               $episode        Target episode to update | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Performs an update for a single episode in the SQLite | ||
|  | #               database. The usual scenario is after the Community News show | ||
|  | #               has been posted, or a spelling mistake has been corrected for | ||
|  | #               an episode. The episode is selected from the MySQL database | ||
|  | #               and the SQLite one. The 'title' and 'summary' fields are | ||
|  | #               compared across the two copies, and if they differ the | ||
|  | #               necessary changes are set up to be executed or reported in | ||
|  | #               'dry run' mode. | ||
|  | #               NOTE: No longer done this way: | ||
|  | #               |A search for assets is made by reading the show notes and any | ||
|  | #               |notes that are linked from these, and if there are | ||
|  | #               |supplementary files they are accumulated and the database | ||
|  | #               |updated as necessary (under control of the 'dry run' option). | ||
|  | #               |This is currently the only way that the row in the SQLite | ||
|  | #               |database can be updated after it has been loaded from the | ||
|  | #               |MySQL database, other than when the metadata is collected and | ||
|  | #               |applied. The metadata update process does not fix errors in | ||
|  | #               |the 'title' or 'summary' fields. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub update_episode { | ||
|  |     my ( $dbh1, $dbh2, $episode, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql1, $sql2, $sth1, $h1, $sth2, $h2, $sth3 ); | ||
|  |     my ( %assets, $asset_count, $assets_added, $assets_deleted, $assets_updated ); | ||
|  |     my ( $id, $notes, $links ); | ||
|  |     my ( $updates, %fieldvals, %where ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # SQL to find new rows in the MySQL database | ||
|  |     # | ||
|  |     $sql1 = q{ | ||
|  |         SELECT | ||
|  |             id, | ||
|  |             date, | ||
|  |             title, | ||
|  |             summary, | ||
|  |             notes | ||
|  |         FROM eps | ||
|  |         WHERE id = ? | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # SQL to look for pre-existing episodes in the SQLite database | ||
|  |     # | ||
|  |     $sql2 = q{ | ||
|  |         SELECT * FROM episodes WHERE id = ? | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prepare the SQL statements | ||
|  |     # | ||
|  |     $sth1 = $dbh1->prepare($sql1); | ||
|  |     $sth2 = $dbh2->prepare($sql2); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Query MySQL, for the $episode | ||
|  |     # | ||
|  |     $sth1->execute($episode); | ||
|  |     if ( $dbh1->err ) { | ||
|  |         croak $dbh1->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Loop through the MySQL row (should only be one) | ||
|  |     # | ||
|  |     $asset_count = $updates = 0; | ||
|  |     while ( $h1 = $sth1->fetchrow_hashref ) { | ||
|  |         # | ||
|  |         # Query the SQLite database for the episode number we just got | ||
|  |         # | ||
|  |         $sth2->execute( $h1->{id} ); | ||
|  |         if ( $dbh2->err ) { | ||
|  |             croak $dbh2->errstr; | ||
|  |         } | ||
|  |         $h2 = $sth2->fetchrow_hashref; | ||
|  | 
 | ||
|  |         # | ||
|  |         # If the row doesn't exist we have a problem. | ||
|  |         # TODO: Could we just add the episode at this point? | ||
|  |         # | ||
|  |         unless ($h2) { | ||
|  |             warn "Row $episode is not in the SQLite database\n"; | ||
|  |             return; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Look for title and summary changes and build a hash holding the | ||
|  |         # changes | ||
|  |         # | ||
|  |         foreach my $fld ( 'title', 'summary' ) { | ||
|  |             if ( !equal( $h1->{$fld}, $h2->{$fld} ) ) { | ||
|  |                 $updates++; | ||
|  |                 $fieldvals{$fld} = $h1->{$fld}; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         if ($updates > 0 ) { | ||
|  |             # | ||
|  |             # Prepare to use SQL::Abstract | ||
|  |             # | ||
|  |             my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |             $where{id} = $episode; | ||
|  |             my ( $stmt, @bindvals ) | ||
|  |                 = $sql->update( 'episodes', \%fieldvals, \%where ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Perform the updates (unless in dry-run mode) | ||
|  |             # | ||
|  |             unless ($dry_run) { | ||
|  |                 $sth3 = $dbh2->prepare($stmt); | ||
|  |                 $sth3->execute(@bindvals); | ||
|  |                 if ( $dbh2->err ) { | ||
|  |                     warn "Processing $episode\n", $dbh2->errstr; | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     print "Updated ",join(',',keys(%fieldvals)),"\n" if $verbose; | ||
|  |                 } | ||
|  |             } | ||
|  |             else { | ||
|  |                 print "Dry run mode:\n"; | ||
|  |                 print "Statement: $stmt\n"; | ||
|  |                 print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check this show for "assets". Even in dry-run mode this gets | ||
|  |         # executed | ||
|  |         # | ||
|  |         $links = 0; | ||
|  | 
 | ||
|  |         $id = $h1->{id}; | ||
|  |         $notes = $h1->{notes}; | ||
|  |         # $links = find_links($id,$notes,\%assets); | ||
|  |         $links = find_assets($ssh,$episode,$rempath,\%assets); | ||
|  | 
 | ||
|  |         $asset_count += $links; | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     if ( $asset_count > 0 ) { | ||
|  |         _debug( $DEBUG >= 2, "Scanned assets:\n" . Dumper( \%assets ) ); | ||
|  |     } | ||
|  |     else { | ||
|  |         _debug( $DEBUG >= 2, "No assets found" ); | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Add any assets collected from the episode just processed | ||
|  |     # | ||
|  |     $assets_added = 0; | ||
|  |     if ($asset_count > 0) { | ||
|  |         ( $assets_added, $assets_deleted, $assets_updated ) = | ||
|  |             process_assets( $dbh2, \%assets, 0, $dry_run, $verbose ); | ||
|  |     } | ||
|  | 
 | ||
|  |     printf "%-16s %d\n", "Episode checked:", $episode; | ||
|  |     printf "%-16s %d\n", "Episode updates:", $updates; | ||
|  |     printf "%-16s %d\n", "Assets found:",    $asset_count; | ||
|  |     printf "%-16s %d\n", "Assets added:",    coalesce( $assets_added,   0 ); | ||
|  |     printf "%-16s %d\n", "Assets deleted:",  coalesce( $assets_deleted, 0 ); | ||
|  |     printf "%-16s %d\n", "Assets updated:",  coalesce( $assets_updated, 0 ); | ||
|  | 
 | ||
|  |     $log->info("Episode number: $episode"); | ||
|  |     $log->info("Episode updates: $updates"); | ||
|  |     $log->info( 'Assets added,deleted,updated:', | ||
|  |         coalesce( $assets_added, 0 ), ',', | ||
|  |         coalesce( $assets_deleted, 0 ), ',', | ||
|  |         coalesce( $assets_updated, 0 ) | ||
|  |     ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: load_metadata | ||
|  | #      PURPOSE: Loads values from a JSON file which has been generated by the | ||
|  | #               'ia metadata' command and stores them in the 'episodes' table. | ||
|  | #               Also uses the JSON data to determine if the derived files and | ||
|  | #               the source file have been uploaded and which assets have been | ||
|  | #               uploaded. | ||
|  | #   PARAMETERS: $dbh            Handle for the SQLite database | ||
|  | #               $file           Name of file of JSON data | ||
|  | #               $dry_run        Boolean dry run setting | ||
|  | #               $verbose        Boolean verbose setting | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: The input file is expected to consist of many JSON complex | ||
|  | #               objects, generated by the command 'ia metadata <identifier>' | ||
|  | #               for a list of HPR episodes (see script 'collect_show_data'). | ||
|  | #               A count is kept of successful updates, and this is reported | ||
|  | #               once finished. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub load_metadata { | ||
|  |     my ( $dbh, $file, $dry_run, $verbose ) = @_; | ||
|  | 
 | ||
|  |     my ( $filebuffer, @jsonbuffer, %cols, %fieldvals, %where ); | ||
|  |     my ( @deletions, $index ); | ||
|  |     my ( @stash, @assets, $orig_count, $source, $epid, $count, $updates ); | ||
|  |     my ( $sth1, $sth2, $sth3, $rv, $h ); | ||
|  |     my $template = "https://archive.org/details/hpr%04d"; | ||
|  |     my @flds = ( | ||
|  |         'uploaded',          'has_files', | ||
|  |         'with_files',        'with_derived', | ||
|  |         'with_source',       'archive_date', | ||
|  |         'item_last_updated', 'IA_URL', | ||
|  |     ); | ||
|  |     my $re = qr{^hpr\d{4}\.(flac|mp3|ogg|opus|spx|wav)$}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Read the entire JSON file into a buffer | ||
|  |     # | ||
|  |     try { | ||
|  |         $filebuffer = read_text($file); | ||
|  |     } | ||
|  | 
 | ||
|  |     catch { | ||
|  |         die "Failed to read JSON file $file\n"; | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Parse the JSON from the buffer | ||
|  |     # | ||
|  |     my $json = JSON->new; | ||
|  |     @jsonbuffer = $json->incr_parse($filebuffer); | ||
|  | 
 | ||
|  |     #print Dumper(\@jsonbuffer),"\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Check that this is the type of JSON we need | ||
|  |     # | ||
|  |     unless (@jsonbuffer) { | ||
|  |         warn "Empty JSON?\n"; | ||
|  |         return; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Look at each top-level JSON object and check for missing components. | ||
|  |     # It's possible for there to be missing objects, but only in very rare | ||
|  |     # cases when a range has been requested yet there's a gap in that range | ||
|  |     # (reserved slot perhaps). We accumulate indices of invalid objects in | ||
|  |     # @jsonbuffer and then delete the invalid objects. | ||
|  |     # | ||
|  |     $index = 0; | ||
|  |     foreach my $obj (@jsonbuffer) { | ||
|  |         unless ( exists( $obj->{metadata} ) | ||
|  |             && exists( $obj->{metadata}->{identifier} ) | ||
|  |             && exists( $obj->{files} )) | ||
|  |         { | ||
|  |             warn "Invalid JSON? Skipped object #$index\n"; | ||
|  |             push( @deletions, $index ); | ||
|  |             next; | ||
|  |         } | ||
|  |         $index++; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Delete any bad objects we found in the previous pass | ||
|  |     # | ||
|  |     for my $ind (@deletions) { | ||
|  |         splice( @jsonbuffer, $ind, 1 ); | ||
|  |         print STDERR "Deleted invalid item with index $ind\n" if $verbose; | ||
|  |     } | ||
|  |     print "\n" if $verbose; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prepare to get the database row relating to an episode | ||
|  |     # | ||
|  |     $sth1 = $dbh->prepare("SELECT * FROM episodes WHERE id = ?"); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prepare to get the rows relating to assets for an episode | ||
|  |     # | ||
|  |     $sth2 = $dbh->prepare("SELECT * FROM assets WHERE episode_id = ?"); | ||
|  | 
 | ||
|  |     $count = 0; | ||
|  |     foreach my $obj (@jsonbuffer) { | ||
|  |         if ( $obj->{metadata}->{identifier} =~ /hpr(\d{4})/ ) { | ||
|  |             $epid = $1 + 0; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Skip the episodes in the range 1-620 | ||
|  |             # | ||
|  |             #next if ( $epid <= 620 ); | ||
|  |             # | ||
|  |             # 2021-06-27: We don't skip any more. Shows 1-620 have been | ||
|  |             # uploaded in blocks, but we want them on the IA in the same | ||
|  |             # format as all the others. Today we hit 620, working backwards | ||
|  |             # from the earliest IA upload as a project to upload everything | ||
|  |             # with show notes and assets. The existing database entries were | ||
|  |             # changed to make the 'notes' field hold the text 'Block upload' | ||
|  |             # followed by the block archive date and the URL. Then the rows | ||
|  |             # between 1-620 are then to have the new date and URL and these | ||
|  |             # columns have been made NULL in preparation. See the Journal for | ||
|  |             # more information. | ||
|  |             # | ||
|  | 
 | ||
|  |             if ($verbose) { | ||
|  |                 print "Processing ",  $obj->{metadata}->{identifier}, "\n"; | ||
|  |                 print "Publicdate: ", $obj->{metadata}->{publicdate}, "\n"; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Process the files in the metadata | ||
|  |             # | ||
|  |             $orig_count = 0; | ||
|  |             $source     = 0; | ||
|  |             undef(@stash); | ||
|  |             foreach my $f ( @{ $obj->{files} } ) { | ||
|  |                 # | ||
|  |                 # Skip anything with a 'Metatda' format or which has a source | ||
|  |                 # of 'derivative' or 'metadata' or is the newly added | ||
|  |                 # '__ia_thumb.jpg' | ||
|  |                 # | ||
|  |                 next | ||
|  |                     if ( $f->{format} eq 'Metadata' | ||
|  |                     || $f->{source} =~ /^(derivative|metadata)$/ | ||
|  |                     || $f->{name} eq '__ia_thumb.jpg' | ||
|  |                     || $f->{name} =~ /^history\/files\// ); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Count audio files which have been "derived" (by us) prior to upload, | ||
|  |                 # and those that look like 'source' files (e.g. | ||
|  |                 # hpr2472_source.flac). Anything else store in @stash because | ||
|  |                 # it's probably an asset file. | ||
|  |                 # | ||
|  |                 if ( $f->{name} =~ $re ) { | ||
|  |                     $orig_count++; | ||
|  |                 } | ||
|  |                 elsif ( $f->{name} =~ /^hpr\d{4}_source\..+$/ ) { | ||
|  |                     $source++; | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     push(@stash,$f->{name}); | ||
|  |                 } | ||
|  | 
 | ||
|  |                 printf "%s %s\n", $f->{name}, $f->{source} if $verbose; | ||
|  | 
 | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Check the assets before updating the episode | ||
|  |             # | ||
|  |             @assets = collect_assets($dbh, $sth2, $epid); | ||
|  |             _debug( $DEBUG >= 2, '@assets: ' . Dumper(\@assets)); | ||
|  |             _debug( $DEBUG >= 2, '@stash: ' . Dumper(\@stash)); | ||
|  | 
 | ||
|  |             if (@assets || @stash) { | ||
|  |                 @assets = sort(@assets); | ||
|  |                 @stash = sort(@stash); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Smart match the @assets and @stash arrays to look for | ||
|  |                 # difference. In @assets we have what the database holds, and | ||
|  |                 # in @stash we have stored what we found in the JSON. | ||
|  |                 # NOTE: Now that Smartmatch ('~~') is deprecated we use the | ||
|  |                 # rather clever 'match::smart' with its strange |M| operator. | ||
|  |                 # It needs arrays being matched to be offered as references | ||
|  |                 # however. | ||
|  |                 # | ||
|  |                 # unless ( @assets ~~ @stash ) { | ||
|  |                 unless ( \@assets |M| \@stash ) { | ||
|  |                     print "Difference between DB assets and IA\n"; | ||
|  |                     print "DB assets:\n    ",join("\n    ",@assets),"\n"; | ||
|  |                     print "IA assets:\n    ",join("\n    ",@stash),"\n"; | ||
|  |                 } | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # In live mode mark all assets belonging to this episode as | ||
|  |                 # 'uploaded'. | ||
|  |                 # TODO: Is this dangerous? | ||
|  |                 # | ||
|  |                 unless ($dry_run) { | ||
|  |                     mark_assets_uploaded($dbh,$epid); | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     print "Update of 'assets' table skipped; dry run\n"; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Prepare to use SQL::Abstract | ||
|  |             # | ||
|  |             my $sql = SQL::Abstract->new(); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Get the current row for this episode for comparison to find what | ||
|  |             # has been changed. | ||
|  |             # | ||
|  |             $rv = $sth1->execute($epid); | ||
|  |             if ( $dbh->err ) { | ||
|  |                 warn $dbh->errstr; | ||
|  |             } | ||
|  | 
 | ||
|  |             $h = $sth1->fetchrow_hashref; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Store the changes | ||
|  |             # | ||
|  |             $cols{'uploaded'}          = 1; | ||
|  |             $cols{'has_files'}         = ( scalar(@stash) > 0 ? 1 : 0 ); | ||
|  |             $cols{'with_files'}        = ( scalar(@stash) > 0 ? 1 : 0 ); | ||
|  |             $cols{'with_derived'}      = ( $orig_count >= 2   ? 1 : 0 ); | ||
|  |             $cols{'with_source'}       = ( $source > 0        ? 1 : 0 ); | ||
|  |             $cols{'archive_date'}      = $obj->{metadata}->{publicdate}; | ||
|  |             $cols{'item_last_updated'} = coalesce($obj->{item_last_updated},0); | ||
|  |             $cols{'IA_URL'}            = sprintf( $template, $epid ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Look for differences, storing the updated values in %fieldvals | ||
|  |             # and counting the changes. | ||
|  |             # | ||
|  |             $updates = 0; | ||
|  |             undef(%fieldvals); | ||
|  |             foreach my $fld (@flds) { | ||
|  |                 if ( !equal( $h->{$fld}, $cols{$fld} ) ) { | ||
|  |                     $updates++; | ||
|  |                     $fieldvals{$fld} = $cols{$fld}; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             if ( $updates > 0 ) { | ||
|  |                 # | ||
|  |                 # Build the update statement containing changes. It will be: | ||
|  |                 # "UPDATE episodes SET a = b, c = d, ... WHERE id = $epid" | ||
|  |                 # | ||
|  |                 $where{id} = $epid; | ||
|  |                 my ( $stmt, @bindvals ) | ||
|  |                     = $sql->update( 'episodes', \%fieldvals, \%where ); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Perform the updates (unless in dry-run mode) | ||
|  |                 # | ||
|  |                 unless ($dry_run) { | ||
|  |                     $sth3 = $dbh->prepare($stmt); | ||
|  |                     $sth3->execute(@bindvals); | ||
|  |                     if ( $dbh->err ) { | ||
|  |                         warn "Processing $epid\n", $dbh->errstr; | ||
|  |                     } | ||
|  |                     else { | ||
|  |                         print "Updated\n" if $verbose; | ||
|  |                         $count++; | ||
|  |                     } | ||
|  |                 } | ||
|  |                 else { | ||
|  |                     print "Dry run mode:\n"; | ||
|  |                     print "Statement: $stmt\n"; | ||
|  |                     print "Bindvals: ", join( ", ", @bindvals ), "\n"; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |         } | ||
|  |         else { | ||
|  |             printf "Unexpected identifier: %s\n", $obj->{metadata}->{identifier}; | ||
|  |         } | ||
|  | 
 | ||
|  |         print '-' x 80, "\n" if $verbose; | ||
|  |     } | ||
|  | 
 | ||
|  |     print "Number of updates: $count\n"; | ||
|  |     $log->info("Number of updates from JSON: $count"); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: collect_assets | ||
|  | #      PURPOSE: Collects asset names for a given episode | ||
|  | #   PARAMETERS: $dbh            Handle for the SQLite database | ||
|  | #               $sth            Statement handle for the prepared query | ||
|  | #               $epid           Episode id | ||
|  | #      RETURNS: A list of the asset names | ||
|  | #  DESCRIPTION: Just prepares a list of asset filenames by querying the SQLite | ||
|  | #               'assets' table and returns it to the caller | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub collect_assets { | ||
|  |     my ( $dbh, $sth, $epid ) = @_; | ||
|  | 
 | ||
|  |     my ( $rv, $h, @result ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Query the database | ||
|  |     # | ||
|  |     $rv = $sth->execute($epid); | ||
|  |     if ( $dbh->err ) { | ||
|  |         warn $dbh->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     while ($h = $sth->fetchrow_hashref) { | ||
|  |         push(@result,$h->{filename}); | ||
|  |     } | ||
|  | 
 | ||
|  |     return @result; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: mark_assets_uploaded | ||
|  | #      PURPOSE: Marks the assets relating to an episode as 'uploaded' | ||
|  | #   PARAMETERS: $dbh            Handle for the SQLite database | ||
|  | #               $epid           Episode id | ||
|  | #      RETURNS: Number of updates | ||
|  | #  DESCRIPTION: Mark any assets for a given episode as uploaded, if they are | ||
|  | #               not so marked. Expected to be used during an update from JSON | ||
|  | #               data collected from the IA. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub mark_assets_uploaded { | ||
|  |     my ( $dbh, $epid ) = @_; | ||
|  | 
 | ||
|  |     my ($rows); | ||
|  | 
 | ||
|  |     $rows = $dbh->do( | ||
|  |         q{UPDATE assets SET uploaded = 1 | ||
|  |           WHERE episode_id = ? AND uploaded = 0}, | ||
|  |         undef, | ||
|  |         $epid, | ||
|  |     ) or die $dbh->errstr; | ||
|  | 
 | ||
|  |     $rows += 0; | ||
|  | 
 | ||
|  |     return $rows; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: scan_dir | ||
|  | #      PURPOSE: Scans a file from the HPR server generated with a simple | ||
|  | #               'find' command which lists all the files and directories under | ||
|  | #               the ~/www/eps/ directory. | ||
|  | #   PARAMETERS: $dbh            Handle for the SQLite database | ||
|  | #               $file           Name of file of directory data | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Under development | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub scan_dir { | ||
|  |     my ( $dbh, $file ) = @_; | ||
|  | 
 | ||
|  |     my ( %dirtree, $ptr, %shows, $key, $id ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Open the file of paths | ||
|  |     # | ||
|  |     open( my $fh, "<:encoding(UTF-8)", $file ) | ||
|  |         or die "Unable to open '$file'\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Loop through all paths. If it's one of the audio files then skip it | ||
|  |     # | ||
|  |     while ( my $line = <$fh> ) { | ||
|  |         chomp($line); | ||
|  | 
 | ||
|  |         next if ( $line =~ /(flac|mp3|ogg|opus|spx|wav)$/ ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Point into the tree hash and add the necessary nodes from the split | ||
|  |         # path. The result is a hierarchical has structure where the 'leaf' | ||
|  |         # node has an empty anonymous hash as its value. | ||
|  |         # | ||
|  |         # %dirtree = { | ||
|  |         #     'www' => { | ||
|  |         #        'eps' => { | ||
|  |         #            'hpr2356' => { | ||
|  |         #                 'index.html' => {}, | ||
|  |         #                 'fix-ssh-on-pi.bash.txt' => {} | ||
|  |         #             }, | ||
|  |         #         }, | ||
|  |         #     }, | ||
|  |         # } | ||
|  |         # | ||
|  |         $ptr = \%dirtree; | ||
|  |         $ptr = $ptr->{$_} //= {} for split( /\//, $line ); | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     close($fh); | ||
|  | 
 | ||
|  |     #print Dumper(\%dirtree),"\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Build a hash indexed by IA identifier containing an array per value with | ||
|  |     # the files relating to the show therein | ||
|  |     # | ||
|  |     $ptr = \%dirtree; | ||
|  |     # TODO if there are multiple items at a level which one to test | ||
|  |     $key = ( keys(%$ptr) )[0]; | ||
|  |     #$key = first { defined($_) } keys(%$ptr); | ||
|  |     until ( $key =~ /^hpr[0-9]{4}/ ) { | ||
|  |         print "$key\n"; | ||
|  |         $ptr = $ptr->{$key}; | ||
|  |         $key = ( keys(%$ptr) )[0]; | ||
|  |     } | ||
|  |     #print join(", ",keys(%$ptr)); | ||
|  | 
 | ||
|  |     # TODO Doesn't deal with sub-directories | ||
|  |     foreach my $k ( keys(%$ptr) ) { | ||
|  |         if ( ($id) = ( $k =~ /^(hpr[0-9]{4})/ ) ) { | ||
|  |             unless ( defined( $shows{$id} ) ) { | ||
|  |                 $shows{$id} = []; | ||
|  |             } | ||
|  |             push( @{ $shows{$id} }, $k ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     print Dumper( \%shows ), "\n"; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: find_assets | ||
|  | #      PURPOSE: An alternative way to look for assets; connect to the server | ||
|  | #               with SSH and use a 'find' command for matching files and | ||
|  | #               directories. | ||
|  | #   PARAMETERS: $ssh            Net::OpenSSH object previously set up to | ||
|  | #                               connect to the server | ||
|  | #               $episode        episode number we're dealing with | ||
|  | #               $rempath        path to search on the server | ||
|  | #               $rfiles         hashref to receive the files (assets) found | ||
|  | #      RETURNS: Number of files found | ||
|  | #  DESCRIPTION:  | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub find_assets { | ||
|  |     my ($ssh, $episode, $rempath, $rfiles) = @_; | ||
|  | 
 | ||
|  |     my $urlbase = 'https://hackerpublicradio.org/eps/'; | ||
|  |     my (@result, @row, $URL, $file, $assetcount); | ||
|  | 
 | ||
|  |     $rempath .= '/' unless ($rempath =~ /\/$/); | ||
|  | 
 | ||
|  |     @result = $ssh->capture({ timeout => 10 }, | ||
|  |         "find $rempath -regextype egrep -regex '.*/hpr${episode}.*' -printf '%y,%p,%s\n'"); | ||
|  |     $ssh->error and warn "remote 'find' command failed: " . $ssh->error; | ||
|  | 
 | ||
|  | #       d,/home/hpr/www/eps/hpr3775,85 | ||
|  | #       f,/home/hpr/www/eps/hpr3775/hpr3775_source.wav,75006546 | ||
|  | #       f,/home/hpr/www/eps/hpr3775/hpr3775.vtt,13127 | ||
|  | #       f,/home/hpr/www/eps/hpr3775/hpr3775.srt,13830 | ||
|  | #       f,/home/hpr/www/eps/hpr3775/hpr3775.txt,11119 | ||
|  | #       f,/home/hpr/www/eps/hpr3775.mp3,7449559 | ||
|  | #       f,/home/hpr/www/eps/hpr3775.ogg,10631998 | ||
|  | #       f,/home/hpr/www/eps/hpr3775.spx,3516736 | ||
|  | 
 | ||
|  |     $assetcount = 0; | ||
|  |     $rfiles->{$episode} = []; | ||
|  |     for my $file (@result) { | ||
|  |         @row = split( ',', $file ); | ||
|  |         if ( $row[0] eq 'f' ) { | ||
|  |             unless ( $row[1] =~ /$episode\.(mp3|ogg|spx)$/ ) { | ||
|  |                 ( $URL  = $row[1] ) =~ s/$rempath/$urlbase/; | ||
|  |                 ( $file = $row[1] ) =~ s/$rempath//; | ||
|  | 
 | ||
|  |                 push( | ||
|  |                     @{ $rfiles->{$episode} }, | ||
|  |                         { filename => $file, URL => $URL } | ||
|  |                 ); | ||
|  | 
 | ||
|  |                 $assetcount++; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 2, "Results from 'find':\n" . Dumper( \@result ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return the asset count | ||
|  |     # | ||
|  |     return $assetcount; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: find_links | ||
|  | #      PURPOSE: Parses the HTML in a string for links so that a list of assets | ||
|  | #               relating to this show can be made | ||
|  | #   PARAMETERS: $episode        episode number we're dealing with | ||
|  | #               $html           string containing HTML | ||
|  | #               $rlinks         hashref to receive the links found | ||
|  | #      RETURNS: Number of links found | ||
|  | #  DESCRIPTION: Given HTML from the main notes or a subsidiary file the | ||
|  | #               function parses this looking for links in 'a' or 'img' tags. | ||
|  | #               Links are standardised, making them absolute if relative and | ||
|  | #               removing any 'fragment'. The links need to be to files on the | ||
|  | #               HPR website to be of interest. If so the filename part is | ||
|  | #               extracted. If it follows the format 'hpr9999' then it's | ||
|  | #               checked to see if it's for the current show. If not it's | ||
|  | #               ignored. If the filename ends with a '/' then it's assumed | ||
|  | #               it's shorthand for 'index.html' so this name is appended. Then | ||
|  | #               the link and filename are stashed in the hash referenced by | ||
|  | #               $rlinks. If the filename ends with '.html' then we need to | ||
|  | #               parse it in turn, so we get the contents of the link and | ||
|  | #               recurse to parse it. We return the number of HPR links found | ||
|  | #               in the pass through the HTML. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub find_links { | ||
|  |     my ( $episode, $html, $rlinks ) = @_; | ||
|  | 
 | ||
|  |     my ($tree, $epstr, $linkre,    $re2, $filepath, | ||
|  |         $uri,  $slink, $linkcount, $content | ||
|  |     ); | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 3, "find_links enter\n" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Create a tree object | ||
|  |     # | ||
|  |     $tree = HTML::TreeBuilder->new; | ||
|  |     $tree->ignore_unknown(0); | ||
|  |     $tree->no_expand_entities(1); | ||
|  |     $tree->p_strict(1); | ||
|  |     $tree->store_comments(1); | ||
|  |     $tree->warn(1); | ||
|  | 
 | ||
|  |     $tree->parse_content($html) | ||
|  |         or die "HTML::TreeBuilder failed to parse notes: $!\n"; | ||
|  | 
 | ||
|  |     my $baseURL = "https://hackerpublicradio.org"; | ||
|  | 
 | ||
|  |     $epstr = sprintf( "hpr%04d", $episode ); | ||
|  |     $linkre = qr{ | ||
|  |         ^https?:// | ||
|  |         (?:www.)? | ||
|  |         (?:hacker|hobby)publicradio.org/eps/ | ||
|  |         (.+)$ | ||
|  |     }x; | ||
|  |     #(?:(hpr$epstr/.+)|(hpr$epstr.+)|(.+))$ | ||
|  | 
 | ||
|  |     # | ||
|  |     # Counting new links found and stashed | ||
|  |     # | ||
|  |     $linkcount = 0; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Scan for links | ||
|  |     # | ||
|  |     for ( @{ $tree->extract_links( 'a', 'img' ) } ) { | ||
|  |         my ( $link, $element, $attr, $tag ) = @$_; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Standardise the link (expands relative URLs, removes any fragment). | ||
|  |         # Set $URI::ABS_REMOTE_LEADING_DOTS to ensure leading dots in relative | ||
|  |         # URIs are removed. | ||
|  |         # | ||
|  |         local $URI::ABS_REMOTE_LEADING_DOTS = 1; | ||
|  |         $uri = URI->new_abs( $link, $baseURL ); | ||
|  |         $slink = sprintf( "%s:%s", $uri->scheme, $uri->opaque ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Is it an HPR link? | ||
|  |         # | ||
|  |         if ( $slink =~ $linkre ) { | ||
|  |             # | ||
|  |             # The URL we found might be a link into an HTML file with an | ||
|  |             # '#anchor' component ("fragment"). Save the last bracketed match, | ||
|  |             # without any 'fragment' if there is one to get a clean filename | ||
|  |             # or path. | ||
|  |             # | ||
|  |             ( $filepath = "$+" ) =~ s/#.*$//; | ||
|  | 
 | ||
|  |             _debug( $DEBUG >= 3, "Link:      $slink\n" ); | ||
|  |             _debug( $DEBUG >= 3, "File path: $filepath\n" ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Does this file path begin with an 'hpr' prefix? If so is it the | ||
|  |             # show id? If not we don't want to process it. | ||
|  |             # | ||
|  |             if ( $filepath =~ /^(hpr[0-9]{1,4})/ ) { | ||
|  |                 if ( $1 ne $epstr ) { | ||
|  |                     _debug( $DEBUG >= 3, "Ignored $slink\n" ); | ||
|  |                     next; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # The path and URL might end with a slash which means the URL is | ||
|  |             # relying on the Web server to fill in the filename as | ||
|  |             # 'index.html'. We have to make this explicit. | ||
|  |             # | ||
|  |             if ( $slink =~ /\/$/ ) { | ||
|  |                 $slink    .= 'index.html'; | ||
|  |                 $filepath .= 'index.html'; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Initialise this hash element if needed | ||
|  |             # | ||
|  |             unless ( exists( $rlinks->{$episode} ) ) { | ||
|  |                 $rlinks->{$episode} = []; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Stash this filename if it's not already stashed, and if it's | ||
|  |             # HTML get the link and recurse | ||
|  |             # | ||
|  |             unless ( | ||
|  |                 any { $_->{filename} eq $filepath } @{ $rlinks->{$episode} } | ||
|  |                 ) | ||
|  |             { | ||
|  |                 _debug( $DEBUG >= 3, "Stashed $slink and $filepath\n" ); | ||
|  | 
 | ||
|  |                 push( | ||
|  |                     @{ $rlinks->{$episode} }, | ||
|  |                         { filename => $filepath, URL => $slink } | ||
|  |                 ); | ||
|  |                 $linkcount++; | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # An HTML file has to be investigated | ||
|  |                 # | ||
|  |                 if ( $filepath =~ /\.html$/ ) { | ||
|  |                     $content = get($slink); | ||
|  |                     unless ( defined($content) ) { | ||
|  |                         carp "Link $slink returned nothing\n"; | ||
|  |                     } | ||
|  |                     else { | ||
|  |                         $linkcount | ||
|  |                             += find_links( $episode, $content, $rlinks ); | ||
|  |                     } | ||
|  |                 } | ||
|  | 
 | ||
|  |             } | ||
|  | 
 | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 3, "find_links exiting with $linkcount links\n" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return the link count | ||
|  |     # | ||
|  |     return $linkcount; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: count_rows | ||
|  | #      PURPOSE: Counts the rows in a given SQLite table | ||
|  | #   PARAMETERS: $dbh            Handle for the SQLite database | ||
|  | #               $table          Name of table | ||
|  | #      RETURNS: Number of rows | ||
|  | #  DESCRIPTION: Builds SQL from a template including the selected $table. | ||
|  | #               Returns the number of rows once the query has been run. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub count_rows { | ||
|  |     my ( $dbh, $table ) = @_; | ||
|  | 
 | ||
|  |     my ( $sql, $sth, $h, $count ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Count rows in the selected SQLite table | ||
|  |     # | ||
|  |     $sql = "SELECT count(*) AS rowcount FROM $table"; | ||
|  |     $sth = $dbh->prepare($sql); | ||
|  |     $sth->execute(); | ||
|  |     if ( $dbh->err ) { | ||
|  |         croak $dbh->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     $count = 0; | ||
|  |     if ($h = $sth->fetchrow_hashref) { | ||
|  |         $count = $h->{rowcount}; | ||
|  |     } | ||
|  | 
 | ||
|  |     return $count; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: coalesce | ||
|  | #      PURPOSE: To find the first defined argument and return it | ||
|  | #   PARAMETERS: Arbitrary number of arguments | ||
|  | #      RETURNS: The first defined argument or undef if there are none | ||
|  | #  DESCRIPTION: Just a simple way of ensuring an 'undef' value is never | ||
|  | #               returned when doing so might be a problem. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub coalesce { | ||
|  |     foreach (@_) { | ||
|  |         return $_ if defined($_); | ||
|  |     } | ||
|  |     return undef;    ## no critic | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: assets_match | ||
|  | #      PURPOSE: To compare found assets and those in the database | ||
|  | #   PARAMETERS: $found_obj      an asset object (hashref) found by parsing show | ||
|  | #                               notes | ||
|  | #               $db_obj         an asset object (hashref) from the database | ||
|  | #      RETURNS: Value 1 or 2 if they match, otherwise zero | ||
|  | #  DESCRIPTION:  | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub assets_match { | ||
|  |     my ( $found_obj, $db_obj ) = @_; | ||
|  | 
 | ||
|  |     return 0 if ( !defined($found_obj) || !defined($db_obj) ); | ||
|  | 
 | ||
|  |     my ( $t1, $t2 ) = ( | ||
|  |         ( $found_obj->{filename} eq $db_obj->{filename} ), | ||
|  |         ( $found_obj->{URL} eq $db_obj->{URL} ) | ||
|  |     ); | ||
|  | 
 | ||
|  |     return 2 if ( $t1 && $t2 ); | ||
|  |     return 1 if ( $t1 || $t2 ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: equal | ||
|  | #      PURPOSE: Compare two strings even if undefined | ||
|  | #   PARAMETERS: $s1             The first string | ||
|  | #               $s2             The second string | ||
|  | #      RETURNS: True if both strings are undefined, false if one isn't | ||
|  | #               defined, otherwise the result of comparing them. | ||
|  | #  DESCRIPTION: Works on the principle that two undefined strings are equal, | ||
|  | #               a defined and an undefined string are not, and otherwise they | ||
|  | #               are equal if they are equal! | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub equal { | ||
|  |     my ( $s1, $s2 ) = @_; | ||
|  | 
 | ||
|  |     return 1 if ( !defined($s1) && !defined($s2) ); | ||
|  |     return 0 if ( !defined($s1) || !defined($s2) ); | ||
|  | 
 | ||
|  |     return ( $s1 eq $s2 ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: _debug | ||
|  | #      PURPOSE: Prints debug reports | ||
|  | #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | ||
|  | #               $message        Message to print | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | ||
|  | #               newline and then adds one in the 'print' to the caller doesn't | ||
|  | #               have to bother. Prepends the message with 'D> ' to show it's | ||
|  | #               a debug message. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub _debug { | ||
|  |     my ( $active, $message ) = @_; | ||
|  | 
 | ||
|  |     chomp($message); | ||
|  |     print "D> $message\n" if $active; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Usage | ||
|  | #      PURPOSE: Displays a usage message and exit | ||
|  | #   PARAMETERS: None | ||
|  | #      RETURNS: To command line level with exit value 1 | ||
|  | #  DESCRIPTION: Builds the usage message using global values | ||
|  | #       THROWS: no exceptions | ||
|  | #     COMMENTS: none | ||
|  | #     SEE ALSO: n/a | ||
|  | #=============================================================================== | ||
|  | #sub Usage { | ||
|  | #    print STDERR <<EOD; | ||
|  | # | ||
|  | #Usage: $PROG [options] directory | ||
|  | # | ||
|  | #$PROG v$VERSION | ||
|  | # | ||
|  | #    -help               Display this information | ||
|  | #    -[no]dry-run        Enable/disable dry run mode (default off) | ||
|  | #    -[no]verbose        Enable/disable verbose mode (default off) | ||
|  | #    -debug=N            Set the debug level | ||
|  | #    -mode=MODE          Selects the mode from the following list: | ||
|  | #                        - initialise: rebuild the SQLite database from MySQL | ||
|  | #                          (deletes any data already uploaded) | ||
|  | #                        - update: updates the SQLite database from updates | ||
|  | #                          found in the MySQL database | ||
|  | #                        - json: load the 'archive_date' field from a JSON | ||
|  | #                          file. Also sets the 'uploaded' flag and populates | ||
|  | #                          the 'IA_URL' field | ||
|  | #                          ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  | ||
|  | #                        - scandir: scans a file captured from the HPR server | ||
|  | #                          by running a 'find' command on the 'eps' directory | ||
|  | #                          where all the show files reside. This file is | ||
|  | #                          scanned to determine which shows have files other | ||
|  | #                          than the audio so we can keep a tally of which shows | ||
|  | #                          have had these uploaded to the IA | ||
|  | #                          ** Not fully implemented ** | ||
|  | #                          ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~  | ||
|  | #    -episode=N1 [-episode=N2 ...] | ||
|  | #                        In -mode=update defines one or more single episodes to | ||
|  | #                        process otherwise, if this option is not given, all | ||
|  | #                        new episodes in the database are processed. The | ||
|  | #                        episodes are expected to have been already added if | ||
|  | #                        the option is used, and the 'title' and 'summary' | ||
|  | #                        fields and the assets (if any) are to be processed. | ||
|  | #    -range=N1 [-range=N2] | ||
|  | #                        An alternative way of defining existing shows to be | ||
|  | #                        processed in -mode=update. The option can only be | ||
|  | #                        repeated twice. The smaller number defines the start | ||
|  | #                        of the range, and the larger one the end of the range. | ||
|  | #                        So -range=834 -range=840 defines the range 836-840. If | ||
|  | #                        one number is given (or two the same) this means the | ||
|  | #                        same as defining one episode. The episodes are | ||
|  | #                        expected to have been already added and the 'title' | ||
|  | #                        and 'summary' fields and the assets (if any) are to be | ||
|  | #                        processed. | ||
|  | #    -json=FILE          Defines the file to use for -mode=json | ||
|  | #    -scandir=FILE       Defines a file to use for -mode=scandir | ||
|  | #    -dbconfig=FILE      Defines an alternative MySQL configuration file | ||
|  | # | ||
|  | #EOD | ||
|  | #    exit(1); | ||
|  | #} | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Options | ||
|  | #      PURPOSE: Processes command-line options | ||
|  | #   PARAMETERS: $optref     Hash reference to hold the options | ||
|  | #      RETURNS: Undef | ||
|  | #  DESCRIPTION: Process the options we want to offer. See the documentation | ||
|  | #               for details | ||
|  | #       THROWS: no exceptions | ||
|  | #     COMMENTS: none | ||
|  | #     SEE ALSO: n/a | ||
|  | #=============================================================================== | ||
|  | sub Options { | ||
|  |     my ($optref) = @_; | ||
|  | 
 | ||
|  |     my @options = ( | ||
|  |         "help",       "documentation|man", | ||
|  |         "debug=i",    "mode=s", | ||
|  |         "json=s",     "scandir=s", | ||
|  |         "episode=i@", "range=i@", | ||
|  |         "dry-run!",   "verbose!", | ||
|  |         "dbconfig=s", | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ( !GetOptions( $optref, @options ) ) { | ||
|  |         pod2usage( | ||
|  |             -msg     => "$PROG version $VERSION\n", | ||
|  |             -exitval => 1, | ||
|  |             -verbose => 0 | ||
|  |         ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | __END__ | ||
|  | 
 | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #  Application Documentation | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #{{{ | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | upload_manager - manages a SQLite database of details of shows on archive.org | ||
|  | 
 | ||
|  | =head1 VERSION | ||
|  | 
 | ||
|  | This documentation refers to upload_manager version 0.2.19 | ||
|  | 
 | ||
|  | =head1 USAGE | ||
|  | 
 | ||
|  |     ./upload_manager [-help] [-documentation] [-[no]dry-run] [-[no]verbose] | ||
|  |         [-debug=N] -mode=MODE [-episode=N1 [-episode=N2 ...]] [-range=START | ||
|  |         [-range=END]] [-json=FILE] [-scandir=FILE] [-dbconfig=FILE] | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | =over 8 | ||
|  | 
 | ||
|  | =item B<-help> | ||
|  | 
 | ||
|  | Display a brief summary of how to use this script. | ||
|  | 
 | ||
|  | =item B<-documentation> or B<-man> | ||
|  | 
 | ||
|  | Display the entirety of the internal documentation in the form of a manual | ||
|  | page. | ||
|  | 
 | ||
|  | =item B<-[no]dry-run> | ||
|  | 
 | ||
|  | Enable/disable dry run mode (default off) | ||
|  | 
 | ||
|  | =item B<-[no]verbose> | ||
|  | 
 | ||
|  | Enable/disable verbose mode (default off) | ||
|  | 
 | ||
|  | =item B<-debug=N> | ||
|  | 
 | ||
|  | Sets the debug level: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<0> | ||
|  | 
 | ||
|  | This is the default value, and causes no debug output to be generated. | ||
|  | 
 | ||
|  | =item B<1> | ||
|  | 
 | ||
|  | TBA | ||
|  | 
 | ||
|  | =item B<2> | ||
|  | 
 | ||
|  | Produces quite a lot of asset-related output. All of the output from the lower debug levels is | ||
|  | displayed as well as: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | Contents of the %assets hash. This is generated when scanning a show in the | ||
|  | MySQL database in order to find any associated assets. | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | The episode number being checked and the number of assets found while | ||
|  | processing assets. A dump of the assets from the SQLite database is shown as | ||
|  | well as the count of assets found therre. | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | If there are assets related to a show and there are already some in the SQLite | ||
|  | database, a comparison is made to see if any updates are needed. These | ||
|  | differences are listed. | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | If new assets are being added, these are reported from the hash | ||
|  | %assets_by_file. | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | Assets are added using a multiple INSERT statement, and details of what is to | ||
|  | be inserted is shown in the array @multiple_insert. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =item B<3> | ||
|  | 
 | ||
|  | Produces the most output. All of the output from the lower debug levels is | ||
|  | displayed as well as: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | Contents of the @episodes array.  This is generated by using the B<-episode=N> | ||
|  | option once or many times. It is shown before and after the removal of | ||
|  | duplicates. | ||
|  | 
 | ||
|  | =item . | ||
|  | 
 | ||
|  | Contents of the @range array.  This is generated by using the B<-range=N> | ||
|  | option once or twice. | ||
|  | 
 | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =item B<-mode=MODE> | ||
|  | 
 | ||
|  | Selects the mode of operation of the script from the following list: | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<initialise> | ||
|  | 
 | ||
|  | Rebuilds the SQLite database from the MySQL data. Use with caution, though | ||
|  | the script will not initialise tables which contain data. | ||
|  | 
 | ||
|  | =item B<update> | ||
|  | 
 | ||
|  | Updates the SQLite database from updates found in the MySQL database. Exactly | ||
|  | how this is done is controlled by other options. See below for details. | ||
|  | 
 | ||
|  | =item B<json> | ||
|  | 
 | ||
|  | Load the 'archive_date' field from a JSON file. Also sets the 'uploaded' flag | ||
|  | and populates the 'IA_URL' and 'item_last_updated' fields. | ||
|  | 
 | ||
|  | =item B<scandir> | ||
|  | 
 | ||
|  | Scans a file captured from the HPR server by running a 'find' command on the | ||
|  | 'eps' directory where all the show files reside. This file is scanned to | ||
|  | determine which shows have files other than the audio so we can keep a tally | ||
|  | of which shows have had these uploaded to the IA | ||
|  | 
 | ||
|  | ** Not fully implemented; may be phased out ** | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =item B<-episode=N1 [-episode=N2 ...]> | ||
|  | 
 | ||
|  | In B<-mode=update> defines one or more single episodes to process otherwise, if | ||
|  | this option is not given, all new episodes in the database are processed. The | ||
|  | episodes are expected to have been already added if this option is used, and | ||
|  | the 'title' and 'summary' fields and the assets (if any) are to be processed. | ||
|  | 
 | ||
|  | =item B<-range=N1 [-range=N2]> | ||
|  | 
 | ||
|  | An alternative way of defining existing shows to be processed in | ||
|  | B<-mode=update>.  The option can only be repeated twice. The smaller number | ||
|  | defines the start of the range, and the larger one the end of the range.  So | ||
|  | B<-range=834 -range=840> defines the range 836-840. If one number is given (or | ||
|  | two the same) this means the same as defining one episode. The episodes are | ||
|  | expected to have been already added and the 'title' and 'summary' fields and | ||
|  | the assets (if any) are to be processed. | ||
|  | 
 | ||
|  | =item B<-json=FILE> | ||
|  | 
 | ||
|  | Defines the file to use for B<-mode=json>. The contents of the file are | ||
|  | expected to be those produced by running B<collect_show_data>. | ||
|  | 
 | ||
|  | =item B<-scandir=FILE> | ||
|  | 
 | ||
|  | Defines a file to use for B<-mode=scandir>. This feature is not fully | ||
|  | implemented at the moment. | ||
|  | 
 | ||
|  | =item B<-dbconfig=FILE> | ||
|  | 
 | ||
|  | Defines an alternative MySQL configuration file | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | A full description of the application and its features. | ||
|  | May include numerous subsections (i.e. =head2, =head3, etc.) | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DIAGNOSTICS | ||
|  | 
 | ||
|  | A list of every error and warning message that the application can generate | ||
|  | (even the ones that will "never happen"), with a full explanation of each | ||
|  | problem, one or more likely causes, and any suggested remedies. If the | ||
|  | application generates exit status codes (e.g. under Unix) then list the exit | ||
|  | status associated with each error. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 CONFIGURATION AND ENVIRONMENT | ||
|  | 
 | ||
|  | A full explanation of any configuration system(s) used by the application, | ||
|  | including the names and locations of any configuration files, and the | ||
|  | meaning of any environment variables or properties that can be set. These | ||
|  | descriptions must also include details of any configuration language used | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DEPENDENCIES | ||
|  | 
 | ||
|  | A list of all the other modules that this module relies upon, including any | ||
|  | restrictions on versions, and an indication whether these required modules are | ||
|  | part of the standard Perl distribution, part of the module's distribution, | ||
|  | or must be installed separately. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 INCOMPATIBILITIES | ||
|  | 
 | ||
|  | A list of any modules that this module cannot be used in conjunction with. | ||
|  | This may be due to name conflicts in the interface, or competition for | ||
|  | system or program resources, or due to internal limitations of Perl | ||
|  | (for example, many modules that use source code filters are mutually | ||
|  | incompatible). | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 BUGS AND LIMITATIONS | ||
|  | 
 | ||
|  | A list of known problems with the module, together with some indication | ||
|  | whether they are likely to be fixed in an upcoming release. | ||
|  | 
 | ||
|  | Also a list of restrictions on the features the module does provide: | ||
|  | data types that cannot be handled, performance issues and the circumstances | ||
|  | in which they may arise, practical limitations on the size of data sets, | ||
|  | special cases that are not (yet) handled, etc. | ||
|  | 
 | ||
|  | The initial template usually just has: | ||
|  | 
 | ||
|  | There are no known bugs in this module. | ||
|  | Please report problems to <Maintainer name(s)>  (<contact address>) | ||
|  | Patches are welcome. | ||
|  | 
 | ||
|  | =head1 AUTHOR | ||
|  | 
 | ||
|  | <Author name(s)>  (<contact address>) | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 LICENCE AND COPYRIGHT | ||
|  | 
 | ||
|  | Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved. | ||
|  | 
 | ||
|  | Followed by whatever licence you wish to release it under. | ||
|  | For Perl code that is often just: | ||
|  | 
 | ||
|  | This module is free software; you can redistribute it and/or | ||
|  | modify it under the same terms as Perl itself. See perldoc perlartistic. | ||
|  | 
 | ||
|  | This program is distributed in the hope that it will be useful, | ||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | #}}} | ||
|  | 
 | ||
|  | # [zo to open fold, zc to close] | ||
|  | 
 | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||
|  | 
 |