forked from HPR/hpr-tools
		
	InternetArchive/future_upload: Added logging and debugging
InternetArchive/ia_db.sql: Added new tables
InternetArchive/recover_transcripts: New script to run on 'borg' and
    copy missing files from the backup disk to the IA
InternetArchive/repair_assets: More comments, including one about a bug in the design.
InternetArchive/repair_item: Fix relating to octal numbers (if there are
    leading zeroes in a number). '_DEBUG' is now in the function
    library. Added comments to explain obscure stuff.
InternetArchive/snapshot_metadata: New Bash script (to run on my
    desktop) which collects metadata for a show and stores in in the
    '~/HPR/IA/assets' directory. Runs 'view_derivatives' on it to find
    derivative files for deletion.
InternetArchive/tidy_uploaded: Moves files and directories containing
    uploaded files into a holding area for later backup. Added
    debugging, logging and a 'force' mode.
InternetArchive/upload_manager: Manages 'ia.db' (on my workstation).
    Needs many updates which have just started to be added.
InternetArchive/weekly_upload: Old script, now obsolete.
		
	
		
			
				
	
	
		
			2805 lines
		
	
	
		
			92 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			2805 lines
		
	
	
		
			92 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env perl
 | 
						|
#===============================================================================
 | 
						|
#
 | 
						|
#         FILE: upload_manager
 | 
						|
#
 | 
						|
#        USAGE: ./upload_manager [-help] [-[no]dry-run] [-[no]verbose]
 | 
						|
#               [-debug=N] -mode=MODE [-episode=N1 [-episode=N2 ...]]
 | 
						|
#               [-range=START [-range=END]] [-json=FILE] [-scandir=FILE]
 | 
						|
#               [-dbconfig=FILE]
 | 
						|
#
 | 
						|
#  DESCRIPTION: Collect details about HPR shows and the copies on Archive.org
 | 
						|
#               and keep them in a separate SQLite database (for
 | 
						|
#               the moment). This database is used to populate the
 | 
						|
#               developing PostgreSQL Database (which is not currently
 | 
						|
#               progressing very much).
 | 
						|
#
 | 
						|
#      OPTIONS: ---
 | 
						|
# REQUIREMENTS: ---
 | 
						|
#         BUGS: ---
 | 
						|
#        NOTES: 2022-02-15 Had to revert to DBD::MySQL temporarily.
 | 
						|
#               2023-06-12 This code is no longer valid. It expects the main
 | 
						|
#               HTML to point to files on the server (if there are "assets")
 | 
						|
#               and recurses through extra HTML it finds there. It currently
 | 
						|
#               looks for file details in any sub-directory "/eps/hpr1234/".
 | 
						|
#               Now there are no files, so the details of all assets
 | 
						|
#               (including the audio) needs to be in the 'assets' table. Not
 | 
						|
#               sure we are there yet.
 | 
						|
#               2024-03-08 Smartmatch is deprecated; moved to 'match::smart'.
 | 
						|
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | 
						|
#      VERSION: 0.2.19
 | 
						|
#      CREATED: 2017-06-14 10:50:28
 | 
						|
#     REVISION: 2024-03-08 20:49:51
 | 
						|
#
 | 
						|
#===============================================================================
 | 
						|
 | 
						|
use v5.36;
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
use utf8;
 | 
						|
#use experimental 'smartmatch';
 | 
						|
# TODO: use experimental::try;
 | 
						|
 | 
						|
use match::smart;
 | 
						|
 | 
						|
use Carp;
 | 
						|
use Getopt::Long;
 | 
						|
use Pod::Usage;
 | 
						|
 | 
						|
use Config::General;
 | 
						|
use File::Slurper qw{ read_text read_lines };
 | 
						|
use JSON;
 | 
						|
use Net::OpenSSH;
 | 
						|
#use TryCatch; # Broke in late May 2020 due to a problem with Devel::Declare
 | 
						|
use Try::Tiny;
 | 
						|
use Log::Handler;
 | 
						|
 | 
						|
use HTML::TreeBuilder 5 -weak;
 | 
						|
use HTML::Entities;
 | 
						|
use List::Util qw{ min max };
 | 
						|
use List::MoreUtils qw{ any };
 | 
						|
use LWP::Simple;
 | 
						|
 | 
						|
use DBI;
 | 
						|
use SQL::Abstract;
 | 
						|
use SQL::Abstract::Plugin::InsertMulti;
 | 
						|
 | 
						|
use Data::Dumper;
 | 
						|
 | 
						|
#
 | 
						|
# Version number (manually incremented)
 | 
						|
#
 | 
						|
our $VERSION = '0.2.19';
 | 
						|
 | 
						|
#
 | 
						|
# Script and directory names
 | 
						|
#
 | 
						|
( my $PROG = $0 ) =~ s|.*/||mx;
 | 
						|
#( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
 | 
						|
#$DIR = '.' unless $DIR;
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Declarations
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
#
 | 
						|
# Constants and other declarations
 | 
						|
#
 | 
						|
my $basedir       = "$ENV{HOME}/HPR/IA";
 | 
						|
my $logdir        = "$basedir/logs";
 | 
						|
my $logfile       = "$logdir/${PROG}.log";
 | 
						|
my $configfile    = "$basedir/.$PROG.cfg";
 | 
						|
my $db1configfile = "$basedir/.hpr_db.cfg";
 | 
						|
my $database2     = "$basedir/ia.db";
 | 
						|
 | 
						|
my ( $dbh1, $dbh2, $sth1, $h1 );
 | 
						|
 | 
						|
#
 | 
						|
# NOTE: Program config not currently used
 | 
						|
#
 | 
						|
die "Configuration file $configfile not found\n" unless ( -e $configfile );
 | 
						|
 | 
						|
my $conf = Config::General->new(
 | 
						|
    -ConfigFile      => $configfile,
 | 
						|
    -InterPolateVars => 1,
 | 
						|
    -ExtendedAccess  => 1,
 | 
						|
);
 | 
						|
my %config = $conf->getall();
 | 
						|
 | 
						|
#
 | 
						|
# Enable Unicode mode
 | 
						|
#
 | 
						|
binmode STDOUT, ":encoding(UTF-8)";
 | 
						|
binmode STDERR, ":encoding(UTF-8)";
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Options
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
#
 | 
						|
# Option defaults
 | 
						|
#
 | 
						|
my $DEFDEBUG = 0;
 | 
						|
 | 
						|
#
 | 
						|
# Options and arguments
 | 
						|
#
 | 
						|
my %options;
 | 
						|
Options( \%options );
 | 
						|
 | 
						|
#
 | 
						|
# Default help shows minimal information
 | 
						|
#
 | 
						|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
 | 
						|
    if ( $options{'help'} );
 | 
						|
 | 
						|
#
 | 
						|
# The -documentation or -man option shows the full POD documentation through
 | 
						|
# a pager for convenience
 | 
						|
#
 | 
						|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
 | 
						|
    if ( $options{'documentation'} );
 | 
						|
 | 
						|
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG );
 | 
						|
 | 
						|
my $db1cfgfile
 | 
						|
    = ( defined( $options{dbconfig} ) ? $options{dbconfig} : $db1configfile );
 | 
						|
 | 
						|
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
 | 
						|
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
 | 
						|
 | 
						|
my $mode = $options{mode};
 | 
						|
die "Mode is required\n" unless defined($mode);
 | 
						|
$mode = lc($mode);
 | 
						|
 | 
						|
my @episodes;
 | 
						|
@episodes = @{ $options{episode} } if ( defined( $options{episode} ) );
 | 
						|
if ($mode ne 'update' and @episodes) {
 | 
						|
    die "The -episode=N options are only for use in update mode\n";
 | 
						|
}
 | 
						|
 | 
						|
my @range;
 | 
						|
@range = @{ $options{range} } if ( defined( $options{range} ) );
 | 
						|
if ($mode ne 'update' and @range) {
 | 
						|
    die "The -range=N options are only for use in update mode\n";
 | 
						|
}
 | 
						|
 | 
						|
_debug( $DEBUG >= 3, '@episodes: ' . Dumper( \@episodes ) );
 | 
						|
_debug( $DEBUG >= 3, '@range: ' . Dumper( \@range ) );
 | 
						|
 | 
						|
#
 | 
						|
# Check -episode=N versus -range=N. We can't have both
 | 
						|
#
 | 
						|
die "Choose only -episode=N or -range=N, not both\n"
 | 
						|
    if ( defined( $options{episode} ) && defined( $options{range} ) );
 | 
						|
 | 
						|
#
 | 
						|
# Only two range values
 | 
						|
#
 | 
						|
die "-range=N should not be used more than twice\n" if ($#range gt 2);
 | 
						|
 | 
						|
#
 | 
						|
# Turn the range into a list of episodes
 | 
						|
#
 | 
						|
if (defined($options{range})) {
 | 
						|
    @episodes = ( min(@range) .. max(@range) );
 | 
						|
}
 | 
						|
 | 
						|
@episodes = map { sprintf('%04d',$_)} @episodes;
 | 
						|
 | 
						|
_debug( $DEBUG >= 3, '@episodes: ' . Dumper(\@episodes) );
 | 
						|
 | 
						|
die "Too many episodes requested\n" if (scalar(@episodes) > 20);
 | 
						|
 | 
						|
my $json = $options{json};
 | 
						|
if ( $mode eq 'json' ) {
 | 
						|
    die "JSON file is required\n" unless defined($json);
 | 
						|
    die "Unable to find file '$json'\n" unless -e $json;
 | 
						|
}
 | 
						|
 | 
						|
my $dirfile = $options{scandir};
 | 
						|
if ( $mode eq 'scandir' ) {
 | 
						|
    die "Directory listing file is required\n" unless defined($dirfile);
 | 
						|
    die "Unable to find file '$dirfile'\n" unless -e $dirfile;
 | 
						|
}
 | 
						|
 | 
						|
die "Configuration file $db1cfgfile not found\n" unless ( -e $db1cfgfile );
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Set up logging keeping the default log layout except for the date. The format
 | 
						|
# is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is
 | 
						|
# the message.
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
my $log = Log::Handler->new();
 | 
						|
 | 
						|
$log->add(
 | 
						|
    file => {
 | 
						|
        timeformat => "%Y-%m-%d %H:%M:%S",
 | 
						|
        filename   => $logfile,
 | 
						|
        minlevel   => 0,
 | 
						|
        maxlevel   => 7,
 | 
						|
    }
 | 
						|
);
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Load MySQL configuration data
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
my $db1conf = Config::General->new(
 | 
						|
    -ConfigFile      => $db1cfgfile,
 | 
						|
    -InterPolateVars => 1,
 | 
						|
    -ExtendedAccess  => 1,
 | 
						|
);
 | 
						|
my %db1cfg = $db1conf->getall();
 | 
						|
 | 
						|
if ($verbose) {
 | 
						|
    print "MySQL database configuration chosen: $db1cfgfile\n";
 | 
						|
    print "Dry run\n" if $dry_run;
 | 
						|
}
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Connect to the MariaDB database [reverted to mysql temporarily]
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
my $dbhost = $db1cfg{database}->{host} // '127.0.0.1';
 | 
						|
my $dbport = $db1cfg{database}->{port} // 3306;
 | 
						|
my $dbname = $db1cfg{database}->{name};
 | 
						|
my $dbuser = $db1cfg{database}->{user};
 | 
						|
my $dbpwd  = $db1cfg{database}->{password};
 | 
						|
#$dbh1 = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport",
 | 
						|
#    $dbuser, $dbpwd, { AutoCommit => 1 } )
 | 
						|
#    or croak $DBI::errstr;
 | 
						|
$dbh1 = DBI->connect( "DBI:mysql:database=$dbname;host=$dbhost;port=$dbport",
 | 
						|
    $dbuser, $dbpwd, { AutoCommit => 1 } )
 | 
						|
    or croak $DBI::errstr;
 | 
						|
 | 
						|
#
 | 
						|
# Enable client-side UTF8 [temporary]
 | 
						|
#
 | 
						|
$dbh1->{mysql_enable_utf8} = 1;
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Connect to the SQLite database
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
$dbh2 = DBI->connect( "dbi:SQLite:dbname=$database2", "", "" );
 | 
						|
 | 
						|
#
 | 
						|
# Log the starting conditions
 | 
						|
#
 | 
						|
$log->info("Starting $PROG version $VERSION");
 | 
						|
if (@episodes) {
 | 
						|
    $log->info('Episodes:', join( ", ", @episodes ) );
 | 
						|
}
 | 
						|
else {
 | 
						|
    $log->info('Looking for new episodes');
 | 
						|
}
 | 
						|
$log->info("Mode: $mode");
 | 
						|
if ($dry_run) {
 | 
						|
    $log->info('Dry run: Yes');
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Set up for connecting to the server with SSH
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
my $remhost = 'hackerpublicradio.org';
 | 
						|
#my $remport = 22074;
 | 
						|
my $remport = 22;
 | 
						|
my $remuser = 'hpr';
 | 
						|
my $rempath = '/home/hpr/www/eps/';
 | 
						|
 | 
						|
my $ssh = Net::OpenSSH->new( $remhost, user => $remuser, port => $remport );
 | 
						|
$ssh->error and die "Can't ssh to $remhost: " . $ssh->error;
 | 
						|
 | 
						|
$log->info("Connected to ${remuser}\@${remhost}, port $remport");
 | 
						|
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
# Perform the action requested by -mode=X
 | 
						|
#-------------------------------------------------------------------------------
 | 
						|
if ( $mode eq 'initialise' ) {
 | 
						|
    print "Initialise mode\n";
 | 
						|
    initialise( $dbh1, $dbh2 );
 | 
						|
}
 | 
						|
elsif ( $mode eq 'update' ) {
 | 
						|
    print "Update mode\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Range updating is achieved by running the single-shot mode repeatedly
 | 
						|
    #
 | 
						|
    if ( @episodes ) {
 | 
						|
        foreach my $episode (@episodes) {
 | 
						|
            update_episode( $dbh1, $dbh2, $episode, $dry_run, $verbose );
 | 
						|
        }
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        update_db( $dbh1, $dbh2, $dry_run, $verbose );
 | 
						|
    }
 | 
						|
}
 | 
						|
elsif ( $mode eq 'json' ) {
 | 
						|
    print "JSON mode\n";
 | 
						|
    load_metadata( $dbh2, $json, $dry_run, $verbose );
 | 
						|
    #load_upload_dates( $dbh2, $json );
 | 
						|
}
 | 
						|
elsif ( $mode eq 'scandir' ) {
 | 
						|
    print "Scan directory mode\n";
 | 
						|
    scan_dir( $dbh2, $dirfile );
 | 
						|
}
 | 
						|
else {
 | 
						|
    die "Invalid mode: $mode\n";
 | 
						|
}
 | 
						|
 | 
						|
exit;
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: initialise
 | 
						|
#      PURPOSE: Initialise an empty database
 | 
						|
#   PARAMETERS: $dbh1           Handle for the MySQL database
 | 
						|
#               $dbh2           Handle for the SQLite database
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Runs the various initialisation functions, but only if the
 | 
						|
#               database tables 'episodes' and/or 'assets' are empty
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub initialise {
 | 
						|
    my ( $dbh1, $dbh2 ) = @_;
 | 
						|
 | 
						|
    my ($count);
 | 
						|
 | 
						|
    #
 | 
						|
    # Initialise the 'episodes' table if it's empty
 | 
						|
    #
 | 
						|
    $count = count_rows( $dbh2, 'episodes' );
 | 
						|
    if ( $count == 0 ) {
 | 
						|
        print "Initialising the 'episodes' table\n";
 | 
						|
        initialise_episodes( $dbh1, $dbh2 );
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        print "Did not initialise the 'episodes' table ($count rows)\n";
 | 
						|
        $log->info("Did not initialise the 'episodes' table ($count rows)");
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Initialise the 'assets' table if it's empty
 | 
						|
    #
 | 
						|
    $count = count_rows( $dbh2, 'assets' );
 | 
						|
    if ( $count == 0 ) {
 | 
						|
        print "Initialising the 'assets' table\n";
 | 
						|
        initialise_assets( $dbh1, $dbh2 );
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        print "Did not initialise the 'assets' table ($count rows)\n";
 | 
						|
        $log->info("Did not initialise the 'assets' table ($count rows)");
 | 
						|
    }
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: initialise_episodes
 | 
						|
#      PURPOSE: Initialise an empty 'episodes' table
 | 
						|
#   PARAMETERS: $dbh1           Handle for the MySQL database
 | 
						|
#               $dbh2           Handle for the SQLite database
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Assuming the episodes table is empty loads all episodes from
 | 
						|
#               the MariaDB database
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub initialise_episodes {
 | 
						|
    my ( $dbh1, $dbh2 ) = @_;
 | 
						|
 | 
						|
    my ( $sql1, $sth1, $h1, $count );
 | 
						|
 | 
						|
    $sql1 = q{
 | 
						|
        SELECT
 | 
						|
            id,
 | 
						|
            date,
 | 
						|
            title,
 | 
						|
            summary
 | 
						|
        FROM eps
 | 
						|
    };
 | 
						|
 | 
						|
    $sth1 = $dbh1->prepare($sql1);
 | 
						|
 | 
						|
    $sth1->execute();
 | 
						|
    if ( $dbh1->err ) {
 | 
						|
        croak $dbh1->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    $count = 0;
 | 
						|
    while ( $h1 = $sth1->fetchrow_hashref ) {
 | 
						|
        #
 | 
						|
        # Fields are:
 | 
						|
        # id                    integer,
 | 
						|
        # rdate                 date NOT NULL,
 | 
						|
        # title                 varchar ( 100 ) NOT NULL,
 | 
						|
        # summary               varchar ( 100 ) NOT NULL,
 | 
						|
        # uploaded              integer DEFAULT 0,
 | 
						|
        # has_files             INTEGER DEFAULT 0,
 | 
						|
        # with_files            INTEGER DEFAULT 0,
 | 
						|
        # with_derived          INTEGER DEFAULT 0,
 | 
						|
        # with_source           INTEGER DEFAULT 0,
 | 
						|
        # archive_date          date,
 | 
						|
        # item_last_updated     integer
 | 
						|
        # IA_URL                text,
 | 
						|
        # notes                 text,
 | 
						|
        #
 | 
						|
        #>>>
 | 
						|
        $dbh2->do(
 | 
						|
            q{INSERT INTO episodes (id, rdate, title, summary) VALUES(?,?,?,?)},
 | 
						|
            undef,
 | 
						|
            $h1->{id},
 | 
						|
            $h1->{date},
 | 
						|
            $h1->{title},
 | 
						|
            $h1->{summary},
 | 
						|
        ) or die $dbh2->errstr;
 | 
						|
        #<<<
 | 
						|
 | 
						|
        $count++;
 | 
						|
    }
 | 
						|
 | 
						|
    print "Records added: $count\n";
 | 
						|
    $log->info("Records added to 'episodes' table: $count");
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: initialise_assets
 | 
						|
#      PURPOSE: Initialise an empty 'assets' table (during database
 | 
						|
#               initialisation)
 | 
						|
#   PARAMETERS: $dbh1           Handle for the MariaDB database
 | 
						|
#               $dbh2           Handle for the SQLite database
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Assuming the assets table is empty loads all assets (links to
 | 
						|
#               files on the HPR server) into the table
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub initialise_assets {
 | 
						|
    my ( $dbh1, $dbh2 ) = @_;
 | 
						|
 | 
						|
    my ( $sql1, $sth1, $h1, $count, $links, $id, $episode, $notes );
 | 
						|
    my %assets;
 | 
						|
 | 
						|
    #
 | 
						|
    # Collect the notes for shows that have links to other HPR files
 | 
						|
    #
 | 
						|
    $sql1 = q{
 | 
						|
        SELECT
 | 
						|
            id,
 | 
						|
            notes
 | 
						|
        FROM eps
 | 
						|
        WHERE notes REGEXP 'https?://(www.)?(hacker|hobby)publicradio.org/eps/'
 | 
						|
    };
 | 
						|
#        LIMIT 20 OFFSET 30
 | 
						|
 | 
						|
    $sth1 = $dbh1->prepare($sql1);
 | 
						|
 | 
						|
    $sth1->execute();
 | 
						|
    if ( $dbh1->err ) {
 | 
						|
        croak $dbh1->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Process notes one show at a time, recursing into HTML where necessary
 | 
						|
    # and building the %assets hash
 | 
						|
    #
 | 
						|
    $count = 0;
 | 
						|
    while ( $h1 = $sth1->fetchrow_hashref ) {
 | 
						|
        $links = 0;
 | 
						|
 | 
						|
        $id = $h1->{id};
 | 
						|
        $episode = sprintf("%04d", $id);
 | 
						|
        $notes = $h1->{notes};
 | 
						|
 | 
						|
        print "Episode: $id\n";
 | 
						|
        #$links = find_links($id,$notes,\%assets);
 | 
						|
        $links = find_assets($ssh,$episode,$rempath,\%assets);
 | 
						|
        print "Links found: $links\n";
 | 
						|
    }
 | 
						|
 | 
						|
    #print Dumper(\%assets),"\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Use the %assets hash to populate the 'assets' table
 | 
						|
    #
 | 
						|
    foreach my $key (sort(keys(%assets))) {
 | 
						|
        foreach my $obj (@{$assets{$key}}) {
 | 
						|
            #>>>
 | 
						|
            $dbh2->do(
 | 
						|
                q{INSERT INTO assets (episode_id, URL, filename) VALUES(?,?,?)},
 | 
						|
                undef,
 | 
						|
                $key,
 | 
						|
                $obj->{URL},
 | 
						|
                $obj->{filename},
 | 
						|
            ) or die $dbh2->errstr;
 | 
						|
            #<<<
 | 
						|
 | 
						|
            $count++;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "Records added: $count\n";
 | 
						|
    $log->info("Records added to 'assets' table: $count");
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: update_db
 | 
						|
#      PURPOSE: Loads any additional rows from the MySQL database to the
 | 
						|
#               SQLite one.
 | 
						|
#   PARAMETERS: $dbh1           Handle for the MySQL database
 | 
						|
#               $dbh2           Handle for the SQLite database
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Finds the first gap in the 'id' field of the 'episodes' table
 | 
						|
#               in the SQLite database. This is the starting show number it
 | 
						|
#               uses when querying the MySQL database. The function loops over
 | 
						|
#               the 'eps' table in the MySQL database returning details for
 | 
						|
#               storage in the SQLite database. Before storing it double
 | 
						|
#               checks that the row in question does not exist.
 | 
						|
#               TODO
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub update_db {
 | 
						|
    my ( $dbh1, $dbh2, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ( $sql1, $sql2, $sth1, $h1, $sth2, $h2 );
 | 
						|
    my ( $ep_count, $ffree);
 | 
						|
    my ( %assets, $asset_count, $assets_added, $assets_deleted, $assets_updated );
 | 
						|
    my ( $id, $episode, $notes, $links );
 | 
						|
 | 
						|
    print "Updating SQLite database from MySQL\n" if $verbose;
 | 
						|
 | 
						|
    #
 | 
						|
    # Find the first episode id gap in the SQLite database
 | 
						|
    #
 | 
						|
    $sql1 = q{
 | 
						|
        SELECT MIN (id) + 1 AS first_free
 | 
						|
        FROM episodes t1
 | 
						|
        WHERE NOT EXISTS (
 | 
						|
            SELECT *
 | 
						|
            FROM episodes t2
 | 
						|
            WHERE t1.id + 1 = t2.id)
 | 
						|
    };
 | 
						|
 | 
						|
    $sth1 = $dbh2->prepare($sql1);
 | 
						|
 | 
						|
    $sth1->execute();
 | 
						|
    if ( $dbh1->err ) {
 | 
						|
        croak $dbh1->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    $h1    = $sth1->fetchrow_hashref;
 | 
						|
    $ffree = $h1->{first_free};
 | 
						|
    print "First free id: $ffree\n" if $verbose;
 | 
						|
 | 
						|
    #
 | 
						|
    # SQL to find new rows in the MySQL database
 | 
						|
    #
 | 
						|
    $sql1 = q{
 | 
						|
        SELECT
 | 
						|
            id,
 | 
						|
            date,
 | 
						|
            title,
 | 
						|
            summary,
 | 
						|
            notes
 | 
						|
        FROM eps
 | 
						|
        WHERE id >= ?
 | 
						|
    };
 | 
						|
 | 
						|
    #
 | 
						|
    # SQL to look for pre-existing episodes in the SQLite database
 | 
						|
    #
 | 
						|
    $sql2 = q{
 | 
						|
        SELECT * FROM episodes WHERE id = ?
 | 
						|
    };
 | 
						|
 | 
						|
    #
 | 
						|
    # Prepare the SQL statements
 | 
						|
    #
 | 
						|
    $sth1 = $dbh1->prepare($sql1);
 | 
						|
    $sth2 = $dbh2->prepare($sql2);
 | 
						|
 | 
						|
    #
 | 
						|
    # Query MySQL, from $ffree onwards
 | 
						|
    #
 | 
						|
    $sth1->execute($ffree);
 | 
						|
    if ( $dbh1->err ) {
 | 
						|
        croak $dbh1->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Loop through the MySQL rows
 | 
						|
    #
 | 
						|
    $ep_count = $asset_count = 0;
 | 
						|
    while ( $h1 = $sth1->fetchrow_hashref ) {
 | 
						|
        #
 | 
						|
        # Query the SQLite database for the episode number we just got
 | 
						|
        #
 | 
						|
        $sth2->execute( $h1->{id} );
 | 
						|
        if ( $dbh2->err ) {
 | 
						|
            croak $dbh2->errstr;
 | 
						|
        }
 | 
						|
        $h2 = $sth2->fetchrow_hashref;
 | 
						|
 | 
						|
        #
 | 
						|
        # If the row isn't already there create it
 | 
						|
        #
 | 
						|
        unless ($h2) {
 | 
						|
            unless ($dry_run) {
 | 
						|
                #>>>
 | 
						|
                $dbh2->do(
 | 
						|
                    q{INSERT INTO episodes (id, rdate, title, summary) VALUES(?,?,?,?)},
 | 
						|
                    undef,
 | 
						|
                    $h1->{id},
 | 
						|
                    $h1->{date},
 | 
						|
                    $h1->{title},
 | 
						|
                    $h1->{summary},
 | 
						|
                ) or die $dbh2->errstr;
 | 
						|
                #<<<
 | 
						|
 | 
						|
                printf "Added episode: %d\n", $h1->{id} if $verbose;
 | 
						|
 | 
						|
                $ep_count++;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                printf
 | 
						|
                    "Would have inserted a row into the 'episodes' table (%s)\n",
 | 
						|
                    $h1->{id};
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Check this show for "assets" (whether we have the episode already or
 | 
						|
        # not). Even in dry-run mode this gets executed
 | 
						|
        #
 | 
						|
        $links = 0;
 | 
						|
 | 
						|
        $id = $h1->{id};
 | 
						|
        $episode = sprintf("%04d", $id);
 | 
						|
        # $notes = $h1->{notes};
 | 
						|
        # $links = find_links($id,$notes,\%assets);
 | 
						|
        $links = find_assets($ssh,$episode,$rempath,\%assets);
 | 
						|
 | 
						|
        $asset_count += $links;
 | 
						|
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # In debug mode show what assets were collected as we iterated through the
 | 
						|
    # new shows.
 | 
						|
    #
 | 
						|
    _debug( $DEBUG >= 2, '%assets: ' . Dumper(\%assets) );
 | 
						|
 | 
						|
    #
 | 
						|
    # Add any assets collected from the episodes just processed
 | 
						|
    #
 | 
						|
    $assets_added = 0;
 | 
						|
    if ($asset_count > 0) {
 | 
						|
        ( $assets_added, $assets_deleted, $assets_updated ) =
 | 
						|
            process_assets( $dbh2, \%assets, 1, $dry_run, $verbose );
 | 
						|
    }
 | 
						|
 | 
						|
    printf "%-16s %d\n", "Episodes added:", $ep_count;
 | 
						|
    printf "%-16s %d\n", "Assets found:",   $asset_count;
 | 
						|
    printf "%-16s %d\n", "Assets added:",   coalesce( $assets_added,   0 );
 | 
						|
    printf "%-16s %d\n", "Assets deleted:", coalesce( $assets_deleted, 0 );
 | 
						|
    printf "%-16s %d\n", "Assets updated:", coalesce( $assets_updated, 0 );
 | 
						|
 | 
						|
    $log->info("Episodes added: $ep_count");
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: process_assets
 | 
						|
#      PURPOSE: Given a collection of asset details obtained by searching
 | 
						|
#               notes for a given range of episodes determine whether they
 | 
						|
#               need to be added to the SQLite database.
 | 
						|
#   PARAMETERS: $dbh            SQLite database handle
 | 
						|
#               $assets         hashref pointing to the collected assets which
 | 
						|
#                               have been found by parsing the notes of all
 | 
						|
#                               relevant shows prior to calling this routine.
 | 
						|
#               $new            Boolean flag to indicate that we're dealing with a new
 | 
						|
#                               show or updating an old one
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Number of assets added
 | 
						|
#  DESCRIPTION: The %assets hash has been accumulated by scanning all the
 | 
						|
#               notes belonging to new episodes found in the MySQL database.
 | 
						|
#               The hash is keyed by the episode number and it is scanned in
 | 
						|
#               sorted order of the key. The SQLite database is queried for
 | 
						|
#               assets with this key and the result held in a hash keyed by
 | 
						|
#               the filename. The value of each key in the %assets hash is an
 | 
						|
#               array of hashes holding the asset details, so we loop through
 | 
						|
#               these. If the filename of a particular asset hash is not found
 | 
						|
#               in the hash from the database then we create a new row in the
 | 
						|
#               'assets' table (assuming that we're not in dry run mode where
 | 
						|
#               we simply report what we would have done). If any assets were
 | 
						|
#               added for an episode then we set the flag 'has_files' (unless
 | 
						|
#               in dry run mode). The total number of assets added to the
 | 
						|
#               database is returned by the function.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub process_assets {
 | 
						|
    my ( $dbh, $assets, $new, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ( $sql, $sth, $db_assets, $key_field, $db_count );
 | 
						|
    my ( $asset_count, $assets_added, $assets_deleted, $assets_updated );
 | 
						|
 | 
						|
    $key_field = 'id';
 | 
						|
 | 
						|
    #
 | 
						|
    # We should only be called if there are assets, but just in case...
 | 
						|
    #
 | 
						|
    return ( 0, 0, 0 ) unless ($assets);
 | 
						|
 | 
						|
    #
 | 
						|
    # SQL to look for pre-existing assets in the SQLite database
 | 
						|
    #
 | 
						|
    $sql = q{
 | 
						|
        SELECT * FROM assets WHERE episode_id = ?
 | 
						|
    };
 | 
						|
    $sth = $dbh->prepare($sql);
 | 
						|
 | 
						|
    say 'Processing assets for ', ( $new ? 'new' : 'existing' ), ' show'
 | 
						|
        if $verbose;
 | 
						|
 | 
						|
    #
 | 
						|
    # Use the %assets hash to populate the 'assets' table. The hash is keyed
 | 
						|
    # by episode numbers, and each entry consists of an array of hashes each
 | 
						|
    # containing the filename and the URL of the asset.
 | 
						|
    #
 | 
						|
    $assets_added = $assets_deleted = $assets_updated = 0;
 | 
						|
    foreach my $key ( sort( keys(%$assets) ) ) {
 | 
						|
        print "Checking episode $key\n" if $verbose;
 | 
						|
 | 
						|
        #
 | 
						|
        # Count the assets for the current show
 | 
						|
        #
 | 
						|
        $asset_count = scalar( @{ $assets->{$key} } );
 | 
						|
 | 
						|
        _debug( $DEBUG >= 2, "Episode: $key, Asset_count: $asset_count" );
 | 
						|
 | 
						|
       #
 | 
						|
       # Query the SQLite database for the episode number we just got, looking
 | 
						|
       # for entries in the 'assets' table
 | 
						|
       #
 | 
						|
        $sth->execute($key);
 | 
						|
        if ( $dbh->err ) {
 | 
						|
            croak $dbh->errstr;
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Grab everything as a hash keyed on the 'filename' field so we can
 | 
						|
        # check it easily. Count what we got (i.e. count the number of assets
 | 
						|
        # in SQLite for this episode).
 | 
						|
        # NOTE: trying the idea of indexing $db_assets by the 'id' field (in
 | 
						|
        # $key_field)
 | 
						|
        #
 | 
						|
        #        $db_assets = $sth->fetchall_hashref('filename');
 | 
						|
        $db_assets = $sth->fetchall_hashref($key_field);
 | 
						|
        $db_count  = scalar( keys(%$db_assets) );
 | 
						|
 | 
						|
        _debug( $DEBUG >= 2, "Assets in ia.db:\n" . Dumper($db_assets) );
 | 
						|
        _debug( $DEBUG >= 2, "DB count: $db_count" );
 | 
						|
 | 
						|
        #
 | 
						|
        # Compare the number of "real" assets with what SQLite holds.
 | 
						|
        #
 | 
						|
        if ( $asset_count > $db_count ) {
 | 
						|
            #
 | 
						|
            # We need to add assets. If it's a newly added show which has
 | 
						|
            # assets this will always be necessary. If it's an existing show
 | 
						|
            # then we're repairing an anomaly and want to say so.
 | 
						|
            #
 | 
						|
            if ($new) {
 | 
						|
                say "Adding assets" if $verbose;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                say "** Found more real assets than in DB -> adding";
 | 
						|
            }
 | 
						|
            $assets_added += insert_assets( $dbh, $key, $assets, $db_assets,
 | 
						|
                $dry_run, $verbose );
 | 
						|
        }
 | 
						|
        elsif ( $asset_count < $db_count ) {
 | 
						|
            #
 | 
						|
            # We need to delete assets. This can't happen for an existing
 | 
						|
            # show, so it's always a noteworthy event.
 | 
						|
            #
 | 
						|
            say "** Found fewer real assets than in DB -> deleting";
 | 
						|
            $assets_deleted += delete_assets( $dbh, $key, $assets, $db_assets,
 | 
						|
                $dry_run, $verbose );
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Look for field differences in what we have after the inserts and
 | 
						|
        # deletes
 | 
						|
        #
 | 
						|
        if ( $assets_added > 0 || $assets_deleted > 0 ) {
 | 
						|
            $sth->execute($key);
 | 
						|
            if ( $dbh->err ) {
 | 
						|
                croak $dbh->errstr;
 | 
						|
            }
 | 
						|
            $db_assets = $sth->fetchall_hashref($key_field);
 | 
						|
        }
 | 
						|
        my @results
 | 
						|
            = synchronise_found_assets( $dbh, $key, $assets, $db_assets,
 | 
						|
            $dry_run, $verbose );
 | 
						|
 | 
						|
        $assets_added   += $results[0];
 | 
						|
        $assets_deleted += $results[1];
 | 
						|
        $assets_updated += $results[2];
 | 
						|
 | 
						|
        _debug( $DEBUG >= 4, '@results ' . Dumper( \@results ) );
 | 
						|
 | 
						|
        #
 | 
						|
        # If there are assets then we need to ensure the 'has_files' setting
 | 
						|
        # reflects it (unless in dry run mode)
 | 
						|
        #
 | 
						|
        if ( $asset_count > 0 ) {
 | 
						|
            unless ($dry_run) {
 | 
						|
                $dbh->do(
 | 
						|
                    q{
 | 
						|
                        UPDATE episodes
 | 
						|
                        SET has_files = 1
 | 
						|
                        WHERE id = ?
 | 
						|
                        AND has_files = 0
 | 
						|
                    },
 | 
						|
                    undef,
 | 
						|
                    $key,
 | 
						|
                ) or die $dbh->errstr;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                say "Would have updated the 'episodes.has_files' ",
 | 
						|
                    "field for $key (if needed)";
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        #        $assets_added += $asset_count;
 | 
						|
 | 
						|
    }
 | 
						|
 | 
						|
    return ( $assets_added, $assets_deleted, $assets_updated );
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: synchronise_found_assets
 | 
						|
#      PURPOSE: Compares the assets found by searching the shownotes (and
 | 
						|
#               other files) with those in the SQLite database and updates
 | 
						|
#               what differs.
 | 
						|
#   PARAMETERS: $dbh            SQLite database handle
 | 
						|
#               $episode        Numeric episode
 | 
						|
#               $assets         Hashref pointing to the collected assets which
 | 
						|
#                               have been found by parsing the show's notes;
 | 
						|
#                               keyed by episode number. Each keyed object is
 | 
						|
#                               an arrayref of hashes containing the URL and
 | 
						|
#                               filename of the asset.
 | 
						|
#               $db_assets      Hash of assets from SQLite, keyed by filename.
 | 
						|
#                               Each keyed object is a hash of columns from
 | 
						|
#                               the database: id, episode_id, URL, filename,
 | 
						|
#                               uploaded.
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Number of additions, deletions and updates
 | 
						|
#  DESCRIPTION: The assets for this episode have been found by searching the
 | 
						|
#               HTML of the show notes and any linked HTML files for files on
 | 
						|
#               the HPR server.  The equivalent assets have also been
 | 
						|
#               collected from the SQLite database. A comparison is made
 | 
						|
#               between these items and if they differ the definitive details
 | 
						|
#               collected from the MySQL database are used. There are three
 | 
						|
#               actions possible: (1) if the URL or filename fields differ
 | 
						|
#               then we can work out the update and apply it, (2) if the
 | 
						|
#               incoming assets don't match any in those from the database
 | 
						|
#               then we can delete the database ones, and (3) we can add the
 | 
						|
#               incoming assets we also couldn't match because they'll be the
 | 
						|
#               replecements!  For all actions changes are made to the
 | 
						|
#               versions in the SQLite database using SQL::Abstract to
 | 
						|
#               construct the queries to do it.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: TODO: There may be multiple asset changes for the episode.
 | 
						|
#               Does the SQL::Abstract call perform the multiple changes?
 | 
						|
#               Needs testing.
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub synchronise_found_assets {
 | 
						|
    my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ( $sth, $additions, $deletions, $updates, $update_count, $index);
 | 
						|
    my ( @fieldvals, @where, @db_rows, @asset_rows, %matches, %changes );
 | 
						|
 | 
						|
    $additions = $deletions = $updates = 0;
 | 
						|
    $update_count = 0;
 | 
						|
 | 
						|
    #
 | 
						|
    # Temporary?
 | 
						|
    #
 | 
						|
    if ( $DEBUG >= 4 ) {
 | 
						|
        _debug( 1, 'In subroutine synchronise_found_assets' );
 | 
						|
        _debug( 1, "\$episode: $episode" );
 | 
						|
        _debug( 1, '%$assets: ' . Dumper($assets) );
 | 
						|
        _debug( 1, '%$db_assets: ' . Dumper($db_assets) );
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 4, 'Looking for assets that match - to exclude them' );
 | 
						|
 | 
						|
    @db_rows = ( keys(%$db_assets) );
 | 
						|
    @asset_rows = ( 0 .. scalar( @{ $assets->{$episode} } ) - 1 );
 | 
						|
    _debug( $DEBUG >= 4, 'Initial @db_rows ' . Dumper( \@db_rows ) );
 | 
						|
    _debug( $DEBUG >= 4, 'Initial @asset_rows ' . Dumper( \@asset_rows ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # Find any asset pair matches
 | 
						|
    #
 | 
						|
    $index = 0;
 | 
						|
    foreach my $db_key ( keys(%$db_assets) ) {
 | 
						|
        foreach my $a_obj ( @{ $assets->{$episode} } ) {
 | 
						|
            if ( assets_match( $a_obj, $db_assets->{$db_key} ) == 2 ) {
 | 
						|
                $matches{$db_key} = [ $a_obj, $db_assets->{$db_key} ];
 | 
						|
#                splice( @asset_rows, $index, 1 );
 | 
						|
                @asset_rows = grep { $_ != $index } @asset_rows;
 | 
						|
                $index++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    _debug( $DEBUG >= 4, 'Assets to ignore: %matches ' . Dumper( \%matches ) );
 | 
						|
    _debug( $DEBUG >= 4, 'Current @asset_rows ' . Dumper( \@asset_rows ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # If everything matched then there's nothing to do, so return zero and
 | 
						|
    # leave.
 | 
						|
    #
 | 
						|
#    if (scalar(@db_rows) == scalar( keys(%matches) )) {
 | 
						|
    if ( scalar(@asset_rows) == 0 ) {
 | 
						|
        _debug( $DEBUG >= 4, 'All rows match, so nothing to do' );
 | 
						|
        return ( 0, 0, 0 );
 | 
						|
    }
 | 
						|
    _debug( $DEBUG >= 4, 'Some rows do not match' );
 | 
						|
 | 
						|
    #
 | 
						|
    # Remove the row ids for any matches
 | 
						|
    #
 | 
						|
    @db_rows = grep { !exists( $matches{$_} ) } @db_rows;
 | 
						|
    _debug( $DEBUG >= 4, '@db_rows after matches ' . Dumper( \@db_rows ) );
 | 
						|
    _debug( $DEBUG >= 4, '@asset_rows after matches ' . Dumper( \@asset_rows ) );
 | 
						|
 | 
						|
    _debug( $DEBUG >= 4, 'Looking for assets that match in only one attribute' );
 | 
						|
 | 
						|
    #
 | 
						|
    # Find any cases where only one element of the asset object matches. This
 | 
						|
    # means we have a simple change.
 | 
						|
    #
 | 
						|
    $index = 0;
 | 
						|
    foreach my $db_key (@db_rows) {
 | 
						|
        foreach my $a_obj ( @{ $assets->{$episode} } ) {
 | 
						|
            if ( assets_match( $a_obj, $db_assets->{$db_key} ) == 1 ) {
 | 
						|
                $changes{$db_key} = [ $a_obj, $db_assets->{$db_key} ];
 | 
						|
                @asset_rows = grep { $_ != $index } @asset_rows;
 | 
						|
                $index++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    _debug( $DEBUG >= 4, 'Assets to change: %changes ' . Dumper( \%changes ) );
 | 
						|
    _debug( $DEBUG >= 4, 'Current @asset_rows ' . Dumper( \@asset_rows ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # Remove the row ids for any changes
 | 
						|
    #
 | 
						|
    @db_rows = grep { !exists( $changes{$_} ) } @db_rows;
 | 
						|
    _debug( $DEBUG >= 4, '@db_rows after changes ' . Dumper( \@db_rows ) );
 | 
						|
    _debug( $DEBUG >= 4, '@asset_rows after changes ' . Dumper( \@asset_rows ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # If there are changes, do them now
 | 
						|
    #
 | 
						|
    if (%changes) {
 | 
						|
        _debug( $DEBUG >= 4, 'Collecting changes' );
 | 
						|
 | 
						|
        #
 | 
						|
        # We may have multiple database rows to process
 | 
						|
        #
 | 
						|
        $index        = 0;
 | 
						|
 | 
						|
        foreach my $key ( keys(%changes) ) {
 | 
						|
            $fieldvals[$index] = {};
 | 
						|
            $where[$index]     = { episode_id => $episode };
 | 
						|
 | 
						|
            #
 | 
						|
            # Compare the fields we're interested in, accumulating differences
 | 
						|
            # (expecting only one per key).
 | 
						|
            #
 | 
						|
            foreach my $fld ( 'filename', 'URL' ) {
 | 
						|
                if (!equal(
 | 
						|
                        $changes{$key}->[0]->{$fld},
 | 
						|
                        $changes{$key}->[1]->{$fld}
 | 
						|
                    )
 | 
						|
                    )
 | 
						|
                {
 | 
						|
                    $update_count++;
 | 
						|
                    $fieldvals[$index]->{$fld} = $changes{$key}->[0]->{$fld};
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            $where[$index]->{id} = $key;
 | 
						|
 | 
						|
            $index++;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 2, '@fieldvals: ' . Dumper( \@fieldvals ) );
 | 
						|
    _debug( $DEBUG >= 2, '@where ' . Dumper( \@where ) );
 | 
						|
 | 
						|
    if ( $update_count > 0 ) {
 | 
						|
        _debug( $DEBUG >= 4, 'Performing changes' );
 | 
						|
        #
 | 
						|
        # Prepare to use SQL::Abstract
 | 
						|
        #
 | 
						|
        my $sql = SQL::Abstract->new();
 | 
						|
 | 
						|
        foreach my $fv (@fieldvals) {
 | 
						|
            my ( $stmt, @bindvals )
 | 
						|
                = $sql->update( 'assets', $fv, shift(@where) );
 | 
						|
 | 
						|
            #
 | 
						|
            # Perform the updates (unless in dry-run mode)
 | 
						|
            #
 | 
						|
            unless ($dry_run) {
 | 
						|
                $sth = $dbh->prepare($stmt);
 | 
						|
                $sth->execute(@bindvals);
 | 
						|
                if ( $dbh->err ) {
 | 
						|
                    warn "Processing $episode\n", $dbh->errstr;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    print "Updated ", join( ',', keys(%$fv) ), "\n"
 | 
						|
                        if $verbose;
 | 
						|
                    $updates++;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                print "Dry run mode:\n";
 | 
						|
                print "Statement: $stmt\n";
 | 
						|
                print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # If anything is left in @db_rows then they will be replacements
 | 
						|
    #
 | 
						|
    if ( scalar(@db_rows) > 0 ) {
 | 
						|
        say "Replacements found: ", scalar(@db_rows);
 | 
						|
 | 
						|
        #
 | 
						|
        # Prepare to use SQL::Abstract
 | 
						|
        #
 | 
						|
        my $sql = SQL::Abstract->new();
 | 
						|
 | 
						|
        #
 | 
						|
        # Every row which came from the database is to be deleted
 | 
						|
        #
 | 
						|
        $index = 0;
 | 
						|
        foreach my $db_key (@db_rows) {
 | 
						|
            $where[$index] = { episode_id => $episode };
 | 
						|
            $where[$index]->{id} = $db_key;
 | 
						|
            $index++;
 | 
						|
        }
 | 
						|
        _debug( $DEBUG >= 2, '@where ' . Dumper( \@where ) );
 | 
						|
 | 
						|
        #
 | 
						|
        # Do deletions
 | 
						|
        #
 | 
						|
        foreach my $db_key (@db_rows) {
 | 
						|
            my ( $stmt, @bindvals )
 | 
						|
                = $sql->delete( 'assets', shift(@where) );
 | 
						|
 | 
						|
            #
 | 
						|
            # Perform the deletions (unless in dry-run mode)
 | 
						|
            #
 | 
						|
            unless ($dry_run) {
 | 
						|
                $sth = $dbh->prepare($stmt);
 | 
						|
                $sth->execute(@bindvals);
 | 
						|
                if ( $dbh->err ) {
 | 
						|
                    warn "Processing $episode\n", $dbh->errstr;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    print "Deleted ", join( ',', @db_rows ), "\n"
 | 
						|
                        if $verbose;
 | 
						|
                    $deletions++;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                print "Dry run mode:\n";
 | 
						|
                print "Statement: $stmt\n";
 | 
						|
                print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Every object in the found assets array for this episode is to be
 | 
						|
        # added.
 | 
						|
        #
 | 
						|
        foreach my $i (@asset_rows) {
 | 
						|
            my $obj = $assets->{$episode}->[$i];
 | 
						|
            $obj->{episode_id} = $episode;
 | 
						|
 | 
						|
            my ( $stmt, @bindvals )
 | 
						|
                = $sql->insert( 'assets', $obj );
 | 
						|
 | 
						|
            #
 | 
						|
            # Perform the additions (unless in dry-run mode)
 | 
						|
            #
 | 
						|
            unless ($dry_run) {
 | 
						|
                $sth = $dbh->prepare($stmt);
 | 
						|
                $sth->execute(@bindvals);
 | 
						|
                if ( $dbh->err ) {
 | 
						|
                    warn "Processing $episode\n", $dbh->errstr;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    print "Inserted array elements ", join( ',', @asset_rows ), "\n"
 | 
						|
                        if $verbose;
 | 
						|
                    $additions++;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                print "Dry run mode:\n";
 | 
						|
                print "Statement: $stmt\n";
 | 
						|
                print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
    }
 | 
						|
 | 
						|
    return ( $additions, $deletions, $updates );
 | 
						|
}
 | 
						|
 | 
						|
################################################################################
 | 
						|
    #
 | 
						|
    # Look for database assets to be ignored. The outer loop iterates through
 | 
						|
    # the primary key values of $db_assets where the rows all match the
 | 
						|
    # $episode value. For each of these assets the next loop iterates through
 | 
						|
    # the array of assets in $assets->{$episode} comparing the fields we want
 | 
						|
    # to check in the next inner loop. If the checked fields are all equal
 | 
						|
    # then there will be nothing to do.
 | 
						|
    #
 | 
						|
#    foreach my $db_key ( keys(%$db_assets) ) {
 | 
						|
#        foreach my $a_obj ( @{ $assets->{$episode} } ) {
 | 
						|
#            $ignore = $delete = $update = 0;
 | 
						|
#            foreach my $fld ( 'filename', 'URL' ) {
 | 
						|
#                $ignore
 | 
						|
#                    += equal( $db_assets->{$db_key}->{$fld}, $a_obj->{$fld} );
 | 
						|
#                $delete
 | 
						|
#                    += !equal( $db_assets->{$db_key}->{$fld}, $a_obj->{$fld} );
 | 
						|
#            }
 | 
						|
#            push( @{ $row_actions{'ignore'} }, $db_assets->{$db_key}->{id} )
 | 
						|
#                if $ignore == 2;
 | 
						|
#            push( @{ $row_actions{'delete'} }, $db_assets->{$db_key}->{id} )
 | 
						|
#                if $delete == 2;
 | 
						|
#        }
 | 
						|
#    }
 | 
						|
#
 | 
						|
#    _debug( $DEBUG >= 4, '%row_actions ' . Dumper( \%row_actions ) );
 | 
						|
#
 | 
						|
    #
 | 
						|
    # Scan the array of hashes obtained from the shownotes for $episode and
 | 
						|
    # record things that need changing.
 | 
						|
    #
 | 
						|
#    $index = 0;
 | 
						|
#    $updates = 0;
 | 
						|
#    foreach my $obj ( @{ $assets->{$episode} } ) {
 | 
						|
#        $fieldvals[$index] = {};
 | 
						|
#
 | 
						|
#        #
 | 
						|
#        # Compare the fields we're interested in
 | 
						|
#        #
 | 
						|
#        foreach my $fld ( 'filename', 'URL' ) {
 | 
						|
#            if ( !equal( $obj->{$fld}, $db_assets->{$obj->{filename}}->{$fld} ) ) {
 | 
						|
#                $updates++;
 | 
						|
#                $fieldvals[$index]->{$fld} = $obj->{$fld};
 | 
						|
#            }
 | 
						|
#        }
 | 
						|
#
 | 
						|
#        # TODO
 | 
						|
#        $fieldvals[$index]->{id} = $db_assets->{$obj->{filename}}->{id};
 | 
						|
#
 | 
						|
#        $index++;
 | 
						|
#    }
 | 
						|
#
 | 
						|
#    _debug( $DEBUG >= 2, '@fieldvals: ' . Dumper( \@fieldvals ) );
 | 
						|
#
 | 
						|
#    if ($updates > 0 ) {
 | 
						|
#        #
 | 
						|
#        # Prepare to use SQL::Abstract
 | 
						|
#        #
 | 
						|
#        my $sql = SQL::Abstract->new();
 | 
						|
#
 | 
						|
#        $where{episode_id} = $episode;
 | 
						|
#
 | 
						|
#        foreach my $fv (@fieldvals) {
 | 
						|
#            $where{id} = $fv->{id};
 | 
						|
#            delete($fv->{id});
 | 
						|
#
 | 
						|
#            my ( $stmt, @bindvals )
 | 
						|
#                = $sql->update( 'assets', $fv, \%where );
 | 
						|
#
 | 
						|
#            #
 | 
						|
#            # Perform the updates (unless in dry-run mode)
 | 
						|
#            #
 | 
						|
#            unless ($dry_run) {
 | 
						|
#                $sth = $dbh->prepare($stmt);
 | 
						|
#                $sth->execute(@bindvals);
 | 
						|
#                if ( $dbh->err ) {
 | 
						|
#                    warn "Processing $episode\n", $dbh->errstr;
 | 
						|
#                }
 | 
						|
#                else {
 | 
						|
#                    print "Updated ", join( ',', keys(%$fv) ), "\n"
 | 
						|
#                        if $verbose;
 | 
						|
#                }
 | 
						|
#            }
 | 
						|
#            else {
 | 
						|
#                print "Dry run mode:\n";
 | 
						|
#                print "Statement: $stmt\n";
 | 
						|
#                print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
#            }
 | 
						|
#        }
 | 
						|
#    }
 | 
						|
#
 | 
						|
################################################################################
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: insert_assets
 | 
						|
#      PURPOSE: Find missing assets and add them to SQLite
 | 
						|
#   PARAMETERS: $dbh            SQLite database handle
 | 
						|
#               $episode        Numeric episode
 | 
						|
#               $assets         Hashref pointing to the collected assets which
 | 
						|
#                               have been found by parsing the show's notes;
 | 
						|
#                               keyed by episode number.
 | 
						|
#               $db_assets      Hash of assets from SQLite, keyed by filename
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Number of insertions
 | 
						|
#  DESCRIPTION: The assets for this episode have been found by searching the
 | 
						|
#               HTML of the show notes and any linked HTML files for files on
 | 
						|
#               the HPR server.  The equivalent assets have also been
 | 
						|
#               collected from the SQLite database. The MySQL assets are
 | 
						|
#               reorganised by filename. For each file which does not already
 | 
						|
#               exist in the SQLite list details are prepared and passed to
 | 
						|
#               SQL::Abstract using the multi insert feature. The missing
 | 
						|
#               details are inserted into the assets table unless dry-run mode
 | 
						|
#               is on.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub insert_assets {
 | 
						|
    my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ($sth, %db_by_file, %assets_by_file, $insertions, @multi_insert, %where);
 | 
						|
 | 
						|
    #
 | 
						|
    # Reorder the %$db_assets hash in filename order
 | 
						|
    #
 | 
						|
    foreach my $key ( keys( %{$db_assets} ) ) {
 | 
						|
        $db_by_file{$db_assets->{$key}->{filename}} = $db_assets->{$key};
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Reorder the %assets hash in filename order
 | 
						|
    #
 | 
						|
    foreach my $obj ( @{ $assets->{$episode} } ) {
 | 
						|
        $assets_by_file{$obj->{filename}} = $obj;
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 2, "%db_by_file: " . Dumper( \%db_by_file ) );
 | 
						|
    _debug( $DEBUG >= 2, "%assets_by_file: " . Dumper( \%assets_by_file ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # Find stuff for insertion. We need to build @multi_insert such that it
 | 
						|
    # contains a list of hashrefs, each containing the fields we want to
 | 
						|
    # insert.
 | 
						|
    # Note: We make a new %fv each time, fill it as a slice, and push it into
 | 
						|
    # the array @multi_insert. This may be rather obscure and therefore
 | 
						|
    # error-prone.
 | 
						|
    #
 | 
						|
    $insertions = 0;
 | 
						|
    foreach my $key ( keys(%assets_by_file) ) {
 | 
						|
#        if ( !exists( $db_assets->{$key} ) ) {
 | 
						|
        if ( !exists( $db_by_file{$key} ) ) {
 | 
						|
            my %fv;
 | 
						|
            $insertions++;
 | 
						|
            @fv{ 'episode_id', 'URL', 'filename' } = (
 | 
						|
                $episode, $assets_by_file{$key}->{URL},
 | 
						|
                $assets_by_file{$key}->{filename}
 | 
						|
            );
 | 
						|
            push( @multi_insert, \%fv );
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 2, '@multi_insert: ' . Dumper( \@multi_insert ) );
 | 
						|
 | 
						|
    if ($insertions > 0) {
 | 
						|
        #
 | 
						|
        # Prepare to use SQL::Abstract
 | 
						|
        #
 | 
						|
        my $sql = SQL::Abstract->new();
 | 
						|
 | 
						|
        my ( $stmt, @bindvals )
 | 
						|
            = $sql->insert_multi( 'assets', \@multi_insert );
 | 
						|
 | 
						|
        #
 | 
						|
        # Perform the insertions (unless in dry-run mode)
 | 
						|
        #
 | 
						|
        unless ($dry_run) {
 | 
						|
            $sth = $dbh->prepare($stmt);
 | 
						|
            $sth->execute(@bindvals);
 | 
						|
            if ( $dbh->err ) {
 | 
						|
                warn "Processing $episode\n", $dbh->errstr;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
#                print "Inserted ",join(',',keys(%where)),"\n" if $verbose;
 | 
						|
                print "Inserted $insertions assets\n" if $verbose;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            print "Dry run mode:\n";
 | 
						|
            print "Statement: $stmt\n";
 | 
						|
            print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return $insertions;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: delete_assets
 | 
						|
#      PURPOSE: Find superfluous assets for an episode in the 'assets' table
 | 
						|
#               and delete them.
 | 
						|
#   PARAMETERS: $dbh            SQLite database handle
 | 
						|
#               $episode        Numeric episode
 | 
						|
#               $assets         Hashref pointing to the collected assets which
 | 
						|
#                               have been found by parsing the show's notes;
 | 
						|
#                               keyed by episode number.
 | 
						|
#               $db_assets      Hash of assets from SQLite, keyed by filename
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Number of deletions
 | 
						|
#  DESCRIPTION: The assets for this episode have been found by searching the
 | 
						|
#               HTML of the show notes and any linked HTML files for files on
 | 
						|
#               the HPR server.  The equivalent assets have also been
 | 
						|
#               collected from the SQLite database. The MySQL assets are
 | 
						|
#               reorganised by filename. For each file exists in the SQLite
 | 
						|
#               list details are prepared and passed to SQL::Abstract. The
 | 
						|
#               unwanted details are deleted from the assets table unless
 | 
						|
#               dry-run mode is on.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub delete_assets {
 | 
						|
    my ( $dbh, $episode, $assets, $db_assets, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ($sth, %db_by_file, %assets_by_file, $deletions, %where);
 | 
						|
 | 
						|
    #
 | 
						|
    # Reorder the %$db_assets hash in filename order
 | 
						|
    #
 | 
						|
    foreach my $key ( keys( %{$db_assets} ) ) {
 | 
						|
        $db_by_file{$db_assets->{$key}->{filename}} = $db_assets->{$key};
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Reorder the %assets hash in filename order
 | 
						|
    #
 | 
						|
    foreach my $obj ( @{ $assets->{$episode} } ) {
 | 
						|
        $assets_by_file{$obj->{filename}} = $obj;
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 2, "%db_by_file: " . Dumper( \%db_by_file ) );
 | 
						|
    _debug( $DEBUG >= 2, "%assets_by_file: " . Dumper( \%assets_by_file ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # Only delete assets with this value in the 'episode_id' field
 | 
						|
    #
 | 
						|
    $where{episode_id} = $episode;
 | 
						|
 | 
						|
    #
 | 
						|
    # Find stuff for deletion. We need to build %where such that it generates
 | 
						|
    # 'WHERE filename = <value1> OR filename = <value2> ...', so we add these
 | 
						|
    # values to an (anonymous) array.
 | 
						|
    #
 | 
						|
    $deletions = 0;
 | 
						|
    foreach my $key ( keys(%$db_assets) ) {
 | 
						|
#        if ( !exists( $assets_by_file{$key} ) ) {
 | 
						|
        if ( !exists( $db_by_file{$key} ) ) {
 | 
						|
            $deletions++;
 | 
						|
            if ( !exists( $where{filename} ) ) {
 | 
						|
                $where{filename} = [$key];
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                push( @{ $where{filename} }, $key );
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 2, "%where: " . Dumper( \%where ) );
 | 
						|
 | 
						|
    if ($deletions > 0) {
 | 
						|
        #
 | 
						|
        # Prepare to use SQL::Abstract
 | 
						|
        #
 | 
						|
        my $sql = SQL::Abstract->new();
 | 
						|
 | 
						|
        my ( $stmt, @bindvals )
 | 
						|
            = $sql->delete( 'assets', \%where );
 | 
						|
 | 
						|
        #
 | 
						|
        # Perform the deletions (unless in dry-run mode)
 | 
						|
        #
 | 
						|
        unless ($dry_run) {
 | 
						|
            $sth = $dbh->prepare($stmt);
 | 
						|
            $sth->execute(@bindvals);
 | 
						|
            if ( $dbh->err ) {
 | 
						|
                warn "Processing $episode\n", $dbh->errstr;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                print "Deleted ",join(',',keys(%where)),"\n" if $verbose;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            print "Dry run mode:\n";
 | 
						|
            print "Statement: $stmt\n";
 | 
						|
            print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return $deletions;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: update_episode
 | 
						|
#      PURPOSE: Perform a database update for a specific episode
 | 
						|
#   PARAMETERS: $dbh1           Handle for the MariaDB database
 | 
						|
#               $dbh2           Handle for the SQLite database
 | 
						|
#               $episode        Target episode to update
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Performs an update for a single episode in the SQLite
 | 
						|
#               database. The usual scenario is after the Community News show
 | 
						|
#               has been posted, or a spelling mistake has been corrected for
 | 
						|
#               an episode. The episode is selected from the MySQL database
 | 
						|
#               and the SQLite one. The 'title' and 'summary' fields are
 | 
						|
#               compared across the two copies, and if they differ the
 | 
						|
#               necessary changes are set up to be executed or reported in
 | 
						|
#               'dry run' mode.
 | 
						|
#               NOTE: No longer done this way:
 | 
						|
#               |A search for assets is made by reading the show notes and any
 | 
						|
#               |notes that are linked from these, and if there are
 | 
						|
#               |supplementary files they are accumulated and the database
 | 
						|
#               |updated as necessary (under control of the 'dry run' option).
 | 
						|
#               |This is currently the only way that the row in the SQLite
 | 
						|
#               |database can be updated after it has been loaded from the
 | 
						|
#               |MySQL database, other than when the metadata is collected and
 | 
						|
#               |applied. The metadata update process does not fix errors in
 | 
						|
#               |the 'title' or 'summary' fields.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub update_episode {
 | 
						|
    my ( $dbh1, $dbh2, $episode, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ( $sql1, $sql2, $sth1, $h1, $sth2, $h2, $sth3 );
 | 
						|
    my ( %assets, $asset_count, $assets_added, $assets_deleted, $assets_updated );
 | 
						|
    my ( $id, $notes, $links );
 | 
						|
    my ( $updates, %fieldvals, %where );
 | 
						|
 | 
						|
    #
 | 
						|
    # SQL to find new rows in the MySQL database
 | 
						|
    #
 | 
						|
    $sql1 = q{
 | 
						|
        SELECT
 | 
						|
            id,
 | 
						|
            date,
 | 
						|
            title,
 | 
						|
            summary,
 | 
						|
            notes
 | 
						|
        FROM eps
 | 
						|
        WHERE id = ?
 | 
						|
    };
 | 
						|
 | 
						|
    #
 | 
						|
    # SQL to look for pre-existing episodes in the SQLite database
 | 
						|
    #
 | 
						|
    $sql2 = q{
 | 
						|
        SELECT * FROM episodes WHERE id = ?
 | 
						|
    };
 | 
						|
 | 
						|
    #
 | 
						|
    # Prepare the SQL statements
 | 
						|
    #
 | 
						|
    $sth1 = $dbh1->prepare($sql1);
 | 
						|
    $sth2 = $dbh2->prepare($sql2);
 | 
						|
 | 
						|
    #
 | 
						|
    # Query MySQL, for the $episode
 | 
						|
    #
 | 
						|
    $sth1->execute($episode);
 | 
						|
    if ( $dbh1->err ) {
 | 
						|
        croak $dbh1->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Loop through the MySQL row (should only be one)
 | 
						|
    #
 | 
						|
    $asset_count = $updates = 0;
 | 
						|
    while ( $h1 = $sth1->fetchrow_hashref ) {
 | 
						|
        #
 | 
						|
        # Query the SQLite database for the episode number we just got
 | 
						|
        #
 | 
						|
        $sth2->execute( $h1->{id} );
 | 
						|
        if ( $dbh2->err ) {
 | 
						|
            croak $dbh2->errstr;
 | 
						|
        }
 | 
						|
        $h2 = $sth2->fetchrow_hashref;
 | 
						|
 | 
						|
        #
 | 
						|
        # If the row doesn't exist we have a problem.
 | 
						|
        # TODO: Could we just add the episode at this point?
 | 
						|
        #
 | 
						|
        unless ($h2) {
 | 
						|
            warn "Row $episode is not in the SQLite database\n";
 | 
						|
            return;
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Look for title and summary changes and build a hash holding the
 | 
						|
        # changes
 | 
						|
        #
 | 
						|
        foreach my $fld ( 'title', 'summary' ) {
 | 
						|
            if ( !equal( $h1->{$fld}, $h2->{$fld} ) ) {
 | 
						|
                $updates++;
 | 
						|
                $fieldvals{$fld} = $h1->{$fld};
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        if ($updates > 0 ) {
 | 
						|
            #
 | 
						|
            # Prepare to use SQL::Abstract
 | 
						|
            #
 | 
						|
            my $sql = SQL::Abstract->new();
 | 
						|
 | 
						|
            $where{id} = $episode;
 | 
						|
            my ( $stmt, @bindvals )
 | 
						|
                = $sql->update( 'episodes', \%fieldvals, \%where );
 | 
						|
 | 
						|
            #
 | 
						|
            # Perform the updates (unless in dry-run mode)
 | 
						|
            #
 | 
						|
            unless ($dry_run) {
 | 
						|
                $sth3 = $dbh2->prepare($stmt);
 | 
						|
                $sth3->execute(@bindvals);
 | 
						|
                if ( $dbh2->err ) {
 | 
						|
                    warn "Processing $episode\n", $dbh2->errstr;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    print "Updated ",join(',',keys(%fieldvals)),"\n" if $verbose;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                print "Dry run mode:\n";
 | 
						|
                print "Statement: $stmt\n";
 | 
						|
                print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        #
 | 
						|
        # Check this show for "assets". Even in dry-run mode this gets
 | 
						|
        # executed
 | 
						|
        #
 | 
						|
        $links = 0;
 | 
						|
 | 
						|
        $id = $h1->{id};
 | 
						|
        $notes = $h1->{notes};
 | 
						|
        # $links = find_links($id,$notes,\%assets);
 | 
						|
        $links = find_assets($ssh,$episode,$rempath,\%assets);
 | 
						|
 | 
						|
        $asset_count += $links;
 | 
						|
 | 
						|
    }
 | 
						|
 | 
						|
    if ( $asset_count > 0 ) {
 | 
						|
        _debug( $DEBUG >= 2, "Scanned assets:\n" . Dumper( \%assets ) );
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        _debug( $DEBUG >= 2, "No assets found" );
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Add any assets collected from the episode just processed
 | 
						|
    #
 | 
						|
    $assets_added = 0;
 | 
						|
    if ($asset_count > 0) {
 | 
						|
        ( $assets_added, $assets_deleted, $assets_updated ) =
 | 
						|
            process_assets( $dbh2, \%assets, 0, $dry_run, $verbose );
 | 
						|
    }
 | 
						|
 | 
						|
    printf "%-16s %d\n", "Episode checked:", $episode;
 | 
						|
    printf "%-16s %d\n", "Episode updates:", $updates;
 | 
						|
    printf "%-16s %d\n", "Assets found:",    $asset_count;
 | 
						|
    printf "%-16s %d\n", "Assets added:",    coalesce( $assets_added,   0 );
 | 
						|
    printf "%-16s %d\n", "Assets deleted:",  coalesce( $assets_deleted, 0 );
 | 
						|
    printf "%-16s %d\n", "Assets updated:",  coalesce( $assets_updated, 0 );
 | 
						|
 | 
						|
    $log->info("Episode number: $episode");
 | 
						|
    $log->info("Episode updates: $updates");
 | 
						|
    $log->info( 'Assets added,deleted,updated:',
 | 
						|
        coalesce( $assets_added, 0 ), ',',
 | 
						|
        coalesce( $assets_deleted, 0 ), ',',
 | 
						|
        coalesce( $assets_updated, 0 )
 | 
						|
    );
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: load_metadata
 | 
						|
#      PURPOSE: Loads values from a JSON file which has been generated by the
 | 
						|
#               'ia metadata' command and stores them in the 'episodes' table.
 | 
						|
#               Also uses the JSON data to determine if the derived files and
 | 
						|
#               the source file have been uploaded and which assets have been
 | 
						|
#               uploaded.
 | 
						|
#   PARAMETERS: $dbh            Handle for the SQLite database
 | 
						|
#               $file           Name of file of JSON data
 | 
						|
#               $dry_run        Boolean dry run setting
 | 
						|
#               $verbose        Boolean verbose setting
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: The input file is expected to consist of many JSON complex
 | 
						|
#               objects, generated by the command 'ia metadata <identifier>'
 | 
						|
#               for a list of HPR episodes (see script 'collect_show_data').
 | 
						|
#               A count is kept of successful updates, and this is reported
 | 
						|
#               once finished.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub load_metadata {
 | 
						|
    my ( $dbh, $file, $dry_run, $verbose ) = @_;
 | 
						|
 | 
						|
    my ( $filebuffer, @jsonbuffer, %cols, %fieldvals, %where );
 | 
						|
    my ( @deletions, $index );
 | 
						|
    my ( @stash, @assets, $orig_count, $source, $epid, $count, $updates );
 | 
						|
    my ( $sth1, $sth2, $sth3, $rv, $h );
 | 
						|
    my $template = "https://archive.org/details/hpr%04d";
 | 
						|
    my @flds = (
 | 
						|
        'uploaded',          'has_files',
 | 
						|
        'with_files',        'with_derived',
 | 
						|
        'with_source',       'archive_date',
 | 
						|
        'item_last_updated', 'IA_URL',
 | 
						|
    );
 | 
						|
    my $re = qr{^hpr\d{4}\.(flac|mp3|ogg|opus|spx|wav)$};
 | 
						|
 | 
						|
    #
 | 
						|
    # Read the entire JSON file into a buffer
 | 
						|
    #
 | 
						|
    try {
 | 
						|
        $filebuffer = read_text($file);
 | 
						|
    }
 | 
						|
 | 
						|
    catch {
 | 
						|
        die "Failed to read JSON file $file\n";
 | 
						|
    };
 | 
						|
 | 
						|
    #
 | 
						|
    # Parse the JSON from the buffer
 | 
						|
    #
 | 
						|
    my $json = JSON->new;
 | 
						|
    @jsonbuffer = $json->incr_parse($filebuffer);
 | 
						|
 | 
						|
    #print Dumper(\@jsonbuffer),"\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Check that this is the type of JSON we need
 | 
						|
    #
 | 
						|
    unless (@jsonbuffer) {
 | 
						|
        warn "Empty JSON?\n";
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Look at each top-level JSON object and check for missing components.
 | 
						|
    # It's possible for there to be missing objects, but only in very rare
 | 
						|
    # cases when a range has been requested yet there's a gap in that range
 | 
						|
    # (reserved slot perhaps). We accumulate indices of invalid objects in
 | 
						|
    # @jsonbuffer and then delete the invalid objects.
 | 
						|
    #
 | 
						|
    $index = 0;
 | 
						|
    foreach my $obj (@jsonbuffer) {
 | 
						|
        unless ( exists( $obj->{metadata} )
 | 
						|
            && exists( $obj->{metadata}->{identifier} )
 | 
						|
            && exists( $obj->{files} ))
 | 
						|
        {
 | 
						|
            warn "Invalid JSON? Skipped object #$index\n";
 | 
						|
            push( @deletions, $index );
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        $index++;
 | 
						|
    }
 | 
						|
 | 
						|
    #
 | 
						|
    # Delete any bad objects we found in the previous pass
 | 
						|
    #
 | 
						|
    for my $ind (@deletions) {
 | 
						|
        splice( @jsonbuffer, $ind, 1 );
 | 
						|
        print STDERR "Deleted invalid item with index $ind\n" if $verbose;
 | 
						|
    }
 | 
						|
    print "\n" if $verbose;
 | 
						|
 | 
						|
    #
 | 
						|
    # Prepare to get the database row relating to an episode
 | 
						|
    #
 | 
						|
    $sth1 = $dbh->prepare("SELECT * FROM episodes WHERE id = ?");
 | 
						|
 | 
						|
    #
 | 
						|
    # Prepare to get the rows relating to assets for an episode
 | 
						|
    #
 | 
						|
    $sth2 = $dbh->prepare("SELECT * FROM assets WHERE episode_id = ?");
 | 
						|
 | 
						|
    $count = 0;
 | 
						|
    foreach my $obj (@jsonbuffer) {
 | 
						|
        if ( $obj->{metadata}->{identifier} =~ /hpr(\d{4})/ ) {
 | 
						|
            $epid = $1 + 0;
 | 
						|
 | 
						|
            #
 | 
						|
            # Skip the episodes in the range 1-620
 | 
						|
            #
 | 
						|
            #next if ( $epid <= 620 );
 | 
						|
            #
 | 
						|
            # 2021-06-27: We don't skip any more. Shows 1-620 have been
 | 
						|
            # uploaded in blocks, but we want them on the IA in the same
 | 
						|
            # format as all the others. Today we hit 620, working backwards
 | 
						|
            # from the earliest IA upload as a project to upload everything
 | 
						|
            # with show notes and assets. The existing database entries were
 | 
						|
            # changed to make the 'notes' field hold the text 'Block upload'
 | 
						|
            # followed by the block archive date and the URL. Then the rows
 | 
						|
            # between 1-620 are then to have the new date and URL and these
 | 
						|
            # columns have been made NULL in preparation. See the Journal for
 | 
						|
            # more information.
 | 
						|
            #
 | 
						|
 | 
						|
            if ($verbose) {
 | 
						|
                print "Processing ",  $obj->{metadata}->{identifier}, "\n";
 | 
						|
                print "Publicdate: ", $obj->{metadata}->{publicdate}, "\n";
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Process the files in the metadata
 | 
						|
            #
 | 
						|
            $orig_count = 0;
 | 
						|
            $source     = 0;
 | 
						|
            undef(@stash);
 | 
						|
            foreach my $f ( @{ $obj->{files} } ) {
 | 
						|
                #
 | 
						|
                # Skip anything with a 'Metatda' format or which has a source
 | 
						|
                # of 'derivative' or 'metadata' or is the newly added
 | 
						|
                # '__ia_thumb.jpg'
 | 
						|
                #
 | 
						|
                next
 | 
						|
                    if ( $f->{format} eq 'Metadata'
 | 
						|
                    || $f->{source} =~ /^(derivative|metadata)$/
 | 
						|
                    || $f->{name} eq '__ia_thumb.jpg'
 | 
						|
                    || $f->{name} =~ /^history\/files\// );
 | 
						|
 | 
						|
                #
 | 
						|
                # Count audio files which have been "derived" (by us) prior to upload,
 | 
						|
                # and those that look like 'source' files (e.g.
 | 
						|
                # hpr2472_source.flac). Anything else store in @stash because
 | 
						|
                # it's probably an asset file.
 | 
						|
                #
 | 
						|
                if ( $f->{name} =~ $re ) {
 | 
						|
                    $orig_count++;
 | 
						|
                }
 | 
						|
                elsif ( $f->{name} =~ /^hpr\d{4}_source\..+$/ ) {
 | 
						|
                    $source++;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    push(@stash,$f->{name});
 | 
						|
                }
 | 
						|
 | 
						|
                printf "%s %s\n", $f->{name}, $f->{source} if $verbose;
 | 
						|
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Check the assets before updating the episode
 | 
						|
            #
 | 
						|
            @assets = collect_assets($dbh, $sth2, $epid);
 | 
						|
            _debug( $DEBUG >= 2, '@assets: ' . Dumper(\@assets));
 | 
						|
            _debug( $DEBUG >= 2, '@stash: ' . Dumper(\@stash));
 | 
						|
 | 
						|
            if (@assets || @stash) {
 | 
						|
                @assets = sort(@assets);
 | 
						|
                @stash = sort(@stash);
 | 
						|
 | 
						|
                #
 | 
						|
                # Smart match the @assets and @stash arrays to look for
 | 
						|
                # difference. In @assets we have what the database holds, and
 | 
						|
                # in @stash we have stored what we found in the JSON.
 | 
						|
                # NOTE: Now that Smartmatch ('~~') is deprecated we use the
 | 
						|
                # rather clever 'match::smart' with its strange |M| operator.
 | 
						|
                # It needs arrays being matched to be offered as references
 | 
						|
                # however.
 | 
						|
                #
 | 
						|
                # unless ( @assets ~~ @stash ) {
 | 
						|
                unless ( \@assets |M| \@stash ) {
 | 
						|
                    print "Difference between DB assets and IA\n";
 | 
						|
                    print "DB assets:\n    ",join("\n    ",@assets),"\n";
 | 
						|
                    print "IA assets:\n    ",join("\n    ",@stash),"\n";
 | 
						|
                }
 | 
						|
 | 
						|
                #
 | 
						|
                # In live mode mark all assets belonging to this episode as
 | 
						|
                # 'uploaded'.
 | 
						|
                # TODO: Is this dangerous?
 | 
						|
                #
 | 
						|
                unless ($dry_run) {
 | 
						|
                    mark_assets_uploaded($dbh,$epid);
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    print "Update of 'assets' table skipped; dry run\n";
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Prepare to use SQL::Abstract
 | 
						|
            #
 | 
						|
            my $sql = SQL::Abstract->new();
 | 
						|
 | 
						|
            #
 | 
						|
            # Get the current row for this episode for comparison to find what
 | 
						|
            # has been changed.
 | 
						|
            #
 | 
						|
            $rv = $sth1->execute($epid);
 | 
						|
            if ( $dbh->err ) {
 | 
						|
                warn $dbh->errstr;
 | 
						|
            }
 | 
						|
 | 
						|
            $h = $sth1->fetchrow_hashref;
 | 
						|
 | 
						|
            #
 | 
						|
            # Store the changes
 | 
						|
            #
 | 
						|
            $cols{'uploaded'}          = 1;
 | 
						|
            $cols{'has_files'}         = ( scalar(@stash) > 0 ? 1 : 0 );
 | 
						|
            $cols{'with_files'}        = ( scalar(@stash) > 0 ? 1 : 0 );
 | 
						|
            $cols{'with_derived'}      = ( $orig_count >= 2   ? 1 : 0 );
 | 
						|
            $cols{'with_source'}       = ( $source > 0        ? 1 : 0 );
 | 
						|
            $cols{'archive_date'}      = $obj->{metadata}->{publicdate};
 | 
						|
            $cols{'item_last_updated'} = coalesce($obj->{item_last_updated},0);
 | 
						|
            $cols{'IA_URL'}            = sprintf( $template, $epid );
 | 
						|
 | 
						|
            #
 | 
						|
            # Look for differences, storing the updated values in %fieldvals
 | 
						|
            # and counting the changes.
 | 
						|
            #
 | 
						|
            $updates = 0;
 | 
						|
            undef(%fieldvals);
 | 
						|
            foreach my $fld (@flds) {
 | 
						|
                if ( !equal( $h->{$fld}, $cols{$fld} ) ) {
 | 
						|
                    $updates++;
 | 
						|
                    $fieldvals{$fld} = $cols{$fld};
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            if ( $updates > 0 ) {
 | 
						|
                #
 | 
						|
                # Build the update statement containing changes. It will be:
 | 
						|
                # "UPDATE episodes SET a = b, c = d, ... WHERE id = $epid"
 | 
						|
                #
 | 
						|
                $where{id} = $epid;
 | 
						|
                my ( $stmt, @bindvals )
 | 
						|
                    = $sql->update( 'episodes', \%fieldvals, \%where );
 | 
						|
 | 
						|
                #
 | 
						|
                # Perform the updates (unless in dry-run mode)
 | 
						|
                #
 | 
						|
                unless ($dry_run) {
 | 
						|
                    $sth3 = $dbh->prepare($stmt);
 | 
						|
                    $sth3->execute(@bindvals);
 | 
						|
                    if ( $dbh->err ) {
 | 
						|
                        warn "Processing $epid\n", $dbh->errstr;
 | 
						|
                    }
 | 
						|
                    else {
 | 
						|
                        print "Updated\n" if $verbose;
 | 
						|
                        $count++;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                    print "Dry run mode:\n";
 | 
						|
                    print "Statement: $stmt\n";
 | 
						|
                    print "Bindvals: ", join( ", ", @bindvals ), "\n";
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            printf "Unexpected identifier: %s\n", $obj->{metadata}->{identifier};
 | 
						|
        }
 | 
						|
 | 
						|
        print '-' x 80, "\n" if $verbose;
 | 
						|
    }
 | 
						|
 | 
						|
    print "Number of updates: $count\n";
 | 
						|
    $log->info("Number of updates from JSON: $count");
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: collect_assets
 | 
						|
#      PURPOSE: Collects asset names for a given episode
 | 
						|
#   PARAMETERS: $dbh            Handle for the SQLite database
 | 
						|
#               $sth            Statement handle for the prepared query
 | 
						|
#               $epid           Episode id
 | 
						|
#      RETURNS: A list of the asset names
 | 
						|
#  DESCRIPTION: Just prepares a list of asset filenames by querying the SQLite
 | 
						|
#               'assets' table and returns it to the caller
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub collect_assets {
 | 
						|
    my ( $dbh, $sth, $epid ) = @_;
 | 
						|
 | 
						|
    my ( $rv, $h, @result );
 | 
						|
 | 
						|
    #
 | 
						|
    # Query the database
 | 
						|
    #
 | 
						|
    $rv = $sth->execute($epid);
 | 
						|
    if ( $dbh->err ) {
 | 
						|
        warn $dbh->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    while ($h = $sth->fetchrow_hashref) {
 | 
						|
        push(@result,$h->{filename});
 | 
						|
    }
 | 
						|
 | 
						|
    return @result;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: mark_assets_uploaded
 | 
						|
#      PURPOSE: Marks the assets relating to an episode as 'uploaded'
 | 
						|
#   PARAMETERS: $dbh            Handle for the SQLite database
 | 
						|
#               $epid           Episode id
 | 
						|
#      RETURNS: Number of updates
 | 
						|
#  DESCRIPTION: Mark any assets for a given episode as uploaded, if they are
 | 
						|
#               not so marked. Expected to be used during an update from JSON
 | 
						|
#               data collected from the IA.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub mark_assets_uploaded {
 | 
						|
    my ( $dbh, $epid ) = @_;
 | 
						|
 | 
						|
    my ($rows);
 | 
						|
 | 
						|
    $rows = $dbh->do(
 | 
						|
        q{UPDATE assets SET uploaded = 1
 | 
						|
          WHERE episode_id = ? AND uploaded = 0},
 | 
						|
        undef,
 | 
						|
        $epid,
 | 
						|
    ) or die $dbh->errstr;
 | 
						|
 | 
						|
    $rows += 0;
 | 
						|
 | 
						|
    return $rows;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: scan_dir
 | 
						|
#      PURPOSE: Scans a file from the HPR server generated with a simple
 | 
						|
#               'find' command which lists all the files and directories under
 | 
						|
#               the ~/www/eps/ directory.
 | 
						|
#   PARAMETERS: $dbh            Handle for the SQLite database
 | 
						|
#               $file           Name of file of directory data
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Under development
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub scan_dir {
 | 
						|
    my ( $dbh, $file ) = @_;
 | 
						|
 | 
						|
    my ( %dirtree, $ptr, %shows, $key, $id );
 | 
						|
 | 
						|
    #
 | 
						|
    # Open the file of paths
 | 
						|
    #
 | 
						|
    open( my $fh, "<:encoding(UTF-8)", $file )
 | 
						|
        or die "Unable to open '$file'\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Loop through all paths. If it's one of the audio files then skip it
 | 
						|
    #
 | 
						|
    while ( my $line = <$fh> ) {
 | 
						|
        chomp($line);
 | 
						|
 | 
						|
        next if ( $line =~ /(flac|mp3|ogg|opus|spx|wav)$/ );
 | 
						|
 | 
						|
        #
 | 
						|
        # Point into the tree hash and add the necessary nodes from the split
 | 
						|
        # path. The result is a hierarchical has structure where the 'leaf'
 | 
						|
        # node has an empty anonymous hash as its value.
 | 
						|
        #
 | 
						|
        # %dirtree = {
 | 
						|
        #     'www' => {
 | 
						|
        #        'eps' => {
 | 
						|
        #            'hpr2356' => {
 | 
						|
        #                 'index.html' => {},
 | 
						|
        #                 'fix-ssh-on-pi.bash.txt' => {}
 | 
						|
        #             },
 | 
						|
        #         },
 | 
						|
        #     },
 | 
						|
        # }
 | 
						|
        #
 | 
						|
        $ptr = \%dirtree;
 | 
						|
        $ptr = $ptr->{$_} //= {} for split( /\//, $line );
 | 
						|
 | 
						|
    }
 | 
						|
 | 
						|
    close($fh);
 | 
						|
 | 
						|
    #print Dumper(\%dirtree),"\n";
 | 
						|
 | 
						|
    #
 | 
						|
    # Build a hash indexed by IA identifier containing an array per value with
 | 
						|
    # the files relating to the show therein
 | 
						|
    #
 | 
						|
    $ptr = \%dirtree;
 | 
						|
    # TODO if there are multiple items at a level which one to test
 | 
						|
    $key = ( keys(%$ptr) )[0];
 | 
						|
    #$key = first { defined($_) } keys(%$ptr);
 | 
						|
    until ( $key =~ /^hpr[0-9]{4}/ ) {
 | 
						|
        print "$key\n";
 | 
						|
        $ptr = $ptr->{$key};
 | 
						|
        $key = ( keys(%$ptr) )[0];
 | 
						|
    }
 | 
						|
    #print join(", ",keys(%$ptr));
 | 
						|
 | 
						|
    # TODO Doesn't deal with sub-directories
 | 
						|
    foreach my $k ( keys(%$ptr) ) {
 | 
						|
        if ( ($id) = ( $k =~ /^(hpr[0-9]{4})/ ) ) {
 | 
						|
            unless ( defined( $shows{$id} ) ) {
 | 
						|
                $shows{$id} = [];
 | 
						|
            }
 | 
						|
            push( @{ $shows{$id} }, $k );
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print Dumper( \%shows ), "\n";
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: find_assets
 | 
						|
#      PURPOSE: An alternative way to look for assets; connect to the server
 | 
						|
#               with SSH and use a 'find' command for matching files and
 | 
						|
#               directories.
 | 
						|
#   PARAMETERS: $ssh            Net::OpenSSH object previously set up to
 | 
						|
#                               connect to the server
 | 
						|
#               $episode        episode number we're dealing with
 | 
						|
#               $rempath        path to search on the server
 | 
						|
#               $rfiles         hashref to receive the files (assets) found
 | 
						|
#      RETURNS: Number of files found
 | 
						|
#  DESCRIPTION: 
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub find_assets {
 | 
						|
    my ($ssh, $episode, $rempath, $rfiles) = @_;
 | 
						|
 | 
						|
    my $urlbase = 'https://hackerpublicradio.org/eps/';
 | 
						|
    my (@result, @row, $URL, $file, $assetcount);
 | 
						|
 | 
						|
    $rempath .= '/' unless ($rempath =~ /\/$/);
 | 
						|
 | 
						|
    @result = $ssh->capture({ timeout => 10 },
 | 
						|
        "find $rempath -regextype egrep -regex '.*/hpr${episode}.*' -printf '%y,%p,%s\n'");
 | 
						|
    $ssh->error and warn "remote 'find' command failed: " . $ssh->error;
 | 
						|
 | 
						|
#       d,/home/hpr/www/eps/hpr3775,85
 | 
						|
#       f,/home/hpr/www/eps/hpr3775/hpr3775_source.wav,75006546
 | 
						|
#       f,/home/hpr/www/eps/hpr3775/hpr3775.vtt,13127
 | 
						|
#       f,/home/hpr/www/eps/hpr3775/hpr3775.srt,13830
 | 
						|
#       f,/home/hpr/www/eps/hpr3775/hpr3775.txt,11119
 | 
						|
#       f,/home/hpr/www/eps/hpr3775.mp3,7449559
 | 
						|
#       f,/home/hpr/www/eps/hpr3775.ogg,10631998
 | 
						|
#       f,/home/hpr/www/eps/hpr3775.spx,3516736
 | 
						|
 | 
						|
    $assetcount = 0;
 | 
						|
    $rfiles->{$episode} = [];
 | 
						|
    for my $file (@result) {
 | 
						|
        @row = split( ',', $file );
 | 
						|
        if ( $row[0] eq 'f' ) {
 | 
						|
            unless ( $row[1] =~ /$episode\.(mp3|ogg|spx)$/ ) {
 | 
						|
                ( $URL  = $row[1] ) =~ s/$rempath/$urlbase/;
 | 
						|
                ( $file = $row[1] ) =~ s/$rempath//;
 | 
						|
 | 
						|
                push(
 | 
						|
                    @{ $rfiles->{$episode} },
 | 
						|
                        { filename => $file, URL => $URL }
 | 
						|
                );
 | 
						|
 | 
						|
                $assetcount++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 2, "Results from 'find':\n" . Dumper( \@result ) );
 | 
						|
 | 
						|
    #
 | 
						|
    # Return the asset count
 | 
						|
    #
 | 
						|
    return $assetcount;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: find_links
 | 
						|
#      PURPOSE: Parses the HTML in a string for links so that a list of assets
 | 
						|
#               relating to this show can be made
 | 
						|
#   PARAMETERS: $episode        episode number we're dealing with
 | 
						|
#               $html           string containing HTML
 | 
						|
#               $rlinks         hashref to receive the links found
 | 
						|
#      RETURNS: Number of links found
 | 
						|
#  DESCRIPTION: Given HTML from the main notes or a subsidiary file the
 | 
						|
#               function parses this looking for links in 'a' or 'img' tags.
 | 
						|
#               Links are standardised, making them absolute if relative and
 | 
						|
#               removing any 'fragment'. The links need to be to files on the
 | 
						|
#               HPR website to be of interest. If so the filename part is
 | 
						|
#               extracted. If it follows the format 'hpr9999' then it's
 | 
						|
#               checked to see if it's for the current show. If not it's
 | 
						|
#               ignored. If the filename ends with a '/' then it's assumed
 | 
						|
#               it's shorthand for 'index.html' so this name is appended. Then
 | 
						|
#               the link and filename are stashed in the hash referenced by
 | 
						|
#               $rlinks. If the filename ends with '.html' then we need to
 | 
						|
#               parse it in turn, so we get the contents of the link and
 | 
						|
#               recurse to parse it. We return the number of HPR links found
 | 
						|
#               in the pass through the HTML.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub find_links {
 | 
						|
    my ( $episode, $html, $rlinks ) = @_;
 | 
						|
 | 
						|
    my ($tree, $epstr, $linkre,    $re2, $filepath,
 | 
						|
        $uri,  $slink, $linkcount, $content
 | 
						|
    );
 | 
						|
 | 
						|
    _debug( $DEBUG >= 3, "find_links enter\n" );
 | 
						|
 | 
						|
    #
 | 
						|
    # Create a tree object
 | 
						|
    #
 | 
						|
    $tree = HTML::TreeBuilder->new;
 | 
						|
    $tree->ignore_unknown(0);
 | 
						|
    $tree->no_expand_entities(1);
 | 
						|
    $tree->p_strict(1);
 | 
						|
    $tree->store_comments(1);
 | 
						|
    $tree->warn(1);
 | 
						|
 | 
						|
    $tree->parse_content($html)
 | 
						|
        or die "HTML::TreeBuilder failed to parse notes: $!\n";
 | 
						|
 | 
						|
    my $baseURL = "https://hackerpublicradio.org";
 | 
						|
 | 
						|
    $epstr = sprintf( "hpr%04d", $episode );
 | 
						|
    $linkre = qr{
 | 
						|
        ^https?://
 | 
						|
        (?:www.)?
 | 
						|
        (?:hacker|hobby)publicradio.org/eps/
 | 
						|
        (.+)$
 | 
						|
    }x;
 | 
						|
    #(?:(hpr$epstr/.+)|(hpr$epstr.+)|(.+))$
 | 
						|
 | 
						|
    #
 | 
						|
    # Counting new links found and stashed
 | 
						|
    #
 | 
						|
    $linkcount = 0;
 | 
						|
 | 
						|
    #
 | 
						|
    # Scan for links
 | 
						|
    #
 | 
						|
    for ( @{ $tree->extract_links( 'a', 'img' ) } ) {
 | 
						|
        my ( $link, $element, $attr, $tag ) = @$_;
 | 
						|
 | 
						|
        #
 | 
						|
        # Standardise the link (expands relative URLs, removes any fragment).
 | 
						|
        # Set $URI::ABS_REMOTE_LEADING_DOTS to ensure leading dots in relative
 | 
						|
        # URIs are removed.
 | 
						|
        #
 | 
						|
        local $URI::ABS_REMOTE_LEADING_DOTS = 1;
 | 
						|
        $uri = URI->new_abs( $link, $baseURL );
 | 
						|
        $slink = sprintf( "%s:%s", $uri->scheme, $uri->opaque );
 | 
						|
 | 
						|
        #
 | 
						|
        # Is it an HPR link?
 | 
						|
        #
 | 
						|
        if ( $slink =~ $linkre ) {
 | 
						|
            #
 | 
						|
            # The URL we found might be a link into an HTML file with an
 | 
						|
            # '#anchor' component ("fragment"). Save the last bracketed match,
 | 
						|
            # without any 'fragment' if there is one to get a clean filename
 | 
						|
            # or path.
 | 
						|
            #
 | 
						|
            ( $filepath = "$+" ) =~ s/#.*$//;
 | 
						|
 | 
						|
            _debug( $DEBUG >= 3, "Link:      $slink\n" );
 | 
						|
            _debug( $DEBUG >= 3, "File path: $filepath\n" );
 | 
						|
 | 
						|
            #
 | 
						|
            # Does this file path begin with an 'hpr' prefix? If so is it the
 | 
						|
            # show id? If not we don't want to process it.
 | 
						|
            #
 | 
						|
            if ( $filepath =~ /^(hpr[0-9]{1,4})/ ) {
 | 
						|
                if ( $1 ne $epstr ) {
 | 
						|
                    _debug( $DEBUG >= 3, "Ignored $slink\n" );
 | 
						|
                    next;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # The path and URL might end with a slash which means the URL is
 | 
						|
            # relying on the Web server to fill in the filename as
 | 
						|
            # 'index.html'. We have to make this explicit.
 | 
						|
            #
 | 
						|
            if ( $slink =~ /\/$/ ) {
 | 
						|
                $slink    .= 'index.html';
 | 
						|
                $filepath .= 'index.html';
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Initialise this hash element if needed
 | 
						|
            #
 | 
						|
            unless ( exists( $rlinks->{$episode} ) ) {
 | 
						|
                $rlinks->{$episode} = [];
 | 
						|
            }
 | 
						|
 | 
						|
            #
 | 
						|
            # Stash this filename if it's not already stashed, and if it's
 | 
						|
            # HTML get the link and recurse
 | 
						|
            #
 | 
						|
            unless (
 | 
						|
                any { $_->{filename} eq $filepath } @{ $rlinks->{$episode} }
 | 
						|
                )
 | 
						|
            {
 | 
						|
                _debug( $DEBUG >= 3, "Stashed $slink and $filepath\n" );
 | 
						|
 | 
						|
                push(
 | 
						|
                    @{ $rlinks->{$episode} },
 | 
						|
                        { filename => $filepath, URL => $slink }
 | 
						|
                );
 | 
						|
                $linkcount++;
 | 
						|
 | 
						|
                #
 | 
						|
                # An HTML file has to be investigated
 | 
						|
                #
 | 
						|
                if ( $filepath =~ /\.html$/ ) {
 | 
						|
                    $content = get($slink);
 | 
						|
                    unless ( defined($content) ) {
 | 
						|
                        carp "Link $slink returned nothing\n";
 | 
						|
                    }
 | 
						|
                    else {
 | 
						|
                        $linkcount
 | 
						|
                            += find_links( $episode, $content, $rlinks );
 | 
						|
                    }
 | 
						|
                }
 | 
						|
 | 
						|
            }
 | 
						|
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    _debug( $DEBUG >= 3, "find_links exiting with $linkcount links\n" );
 | 
						|
 | 
						|
    #
 | 
						|
    # Return the link count
 | 
						|
    #
 | 
						|
    return $linkcount;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: count_rows
 | 
						|
#      PURPOSE: Counts the rows in a given SQLite table
 | 
						|
#   PARAMETERS: $dbh            Handle for the SQLite database
 | 
						|
#               $table          Name of table
 | 
						|
#      RETURNS: Number of rows
 | 
						|
#  DESCRIPTION: Builds SQL from a template including the selected $table.
 | 
						|
#               Returns the number of rows once the query has been run.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub count_rows {
 | 
						|
    my ( $dbh, $table ) = @_;
 | 
						|
 | 
						|
    my ( $sql, $sth, $h, $count );
 | 
						|
 | 
						|
    #
 | 
						|
    # Count rows in the selected SQLite table
 | 
						|
    #
 | 
						|
    $sql = "SELECT count(*) AS rowcount FROM $table";
 | 
						|
    $sth = $dbh->prepare($sql);
 | 
						|
    $sth->execute();
 | 
						|
    if ( $dbh->err ) {
 | 
						|
        croak $dbh->errstr;
 | 
						|
    }
 | 
						|
 | 
						|
    $count = 0;
 | 
						|
    if ($h = $sth->fetchrow_hashref) {
 | 
						|
        $count = $h->{rowcount};
 | 
						|
    }
 | 
						|
 | 
						|
    return $count;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: coalesce
 | 
						|
#      PURPOSE: To find the first defined argument and return it
 | 
						|
#   PARAMETERS: Arbitrary number of arguments
 | 
						|
#      RETURNS: The first defined argument or undef if there are none
 | 
						|
#  DESCRIPTION: Just a simple way of ensuring an 'undef' value is never
 | 
						|
#               returned when doing so might be a problem.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub coalesce {
 | 
						|
    foreach (@_) {
 | 
						|
        return $_ if defined($_);
 | 
						|
    }
 | 
						|
    return undef;    ## no critic
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: assets_match
 | 
						|
#      PURPOSE: To compare found assets and those in the database
 | 
						|
#   PARAMETERS: $found_obj      an asset object (hashref) found by parsing show
 | 
						|
#                               notes
 | 
						|
#               $db_obj         an asset object (hashref) from the database
 | 
						|
#      RETURNS: Value 1 or 2 if they match, otherwise zero
 | 
						|
#  DESCRIPTION: 
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub assets_match {
 | 
						|
    my ( $found_obj, $db_obj ) = @_;
 | 
						|
 | 
						|
    return 0 if ( !defined($found_obj) || !defined($db_obj) );
 | 
						|
 | 
						|
    my ( $t1, $t2 ) = (
 | 
						|
        ( $found_obj->{filename} eq $db_obj->{filename} ),
 | 
						|
        ( $found_obj->{URL} eq $db_obj->{URL} )
 | 
						|
    );
 | 
						|
 | 
						|
    return 2 if ( $t1 && $t2 );
 | 
						|
    return 1 if ( $t1 || $t2 );
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: equal
 | 
						|
#      PURPOSE: Compare two strings even if undefined
 | 
						|
#   PARAMETERS: $s1             The first string
 | 
						|
#               $s2             The second string
 | 
						|
#      RETURNS: True if both strings are undefined, false if one isn't
 | 
						|
#               defined, otherwise the result of comparing them.
 | 
						|
#  DESCRIPTION: Works on the principle that two undefined strings are equal,
 | 
						|
#               a defined and an undefined string are not, and otherwise they
 | 
						|
#               are equal if they are equal!
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub equal {
 | 
						|
    my ( $s1, $s2 ) = @_;
 | 
						|
 | 
						|
    return 1 if ( !defined($s1) && !defined($s2) );
 | 
						|
    return 0 if ( !defined($s1) || !defined($s2) );
 | 
						|
 | 
						|
    return ( $s1 eq $s2 );
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: _debug
 | 
						|
#      PURPOSE: Prints debug reports
 | 
						|
#   PARAMETERS: $active         Boolean: 1 for print, 0 for no print
 | 
						|
#               $message        Message to print
 | 
						|
#      RETURNS: Nothing
 | 
						|
#  DESCRIPTION: Outputs a message if $active is true. It removes any trailing
 | 
						|
#               newline and then adds one in the 'print' to the caller doesn't
 | 
						|
#               have to bother. Prepends the message with 'D> ' to show it's
 | 
						|
#               a debug message.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub _debug {
 | 
						|
    my ( $active, $message ) = @_;
 | 
						|
 | 
						|
    chomp($message);
 | 
						|
    print "D> $message\n" if $active;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: Usage
 | 
						|
#      PURPOSE: Displays a usage message and exit
 | 
						|
#   PARAMETERS: None
 | 
						|
#      RETURNS: To command line level with exit value 1
 | 
						|
#  DESCRIPTION: Builds the usage message using global values
 | 
						|
#       THROWS: no exceptions
 | 
						|
#     COMMENTS: none
 | 
						|
#     SEE ALSO: n/a
 | 
						|
#===============================================================================
 | 
						|
#sub Usage {
 | 
						|
#    print STDERR <<EOD;
 | 
						|
#
 | 
						|
#Usage: $PROG [options] directory
 | 
						|
#
 | 
						|
#$PROG v$VERSION
 | 
						|
#
 | 
						|
#    -help               Display this information
 | 
						|
#    -[no]dry-run        Enable/disable dry run mode (default off)
 | 
						|
#    -[no]verbose        Enable/disable verbose mode (default off)
 | 
						|
#    -debug=N            Set the debug level
 | 
						|
#    -mode=MODE          Selects the mode from the following list:
 | 
						|
#                        - initialise: rebuild the SQLite database from MySQL
 | 
						|
#                          (deletes any data already uploaded)
 | 
						|
#                        - update: updates the SQLite database from updates
 | 
						|
#                          found in the MySQL database
 | 
						|
#                        - json: load the 'archive_date' field from a JSON
 | 
						|
#                          file. Also sets the 'uploaded' flag and populates
 | 
						|
#                          the 'IA_URL' field
 | 
						|
#                          ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
 | 
						|
#                        - scandir: scans a file captured from the HPR server
 | 
						|
#                          by running a 'find' command on the 'eps' directory
 | 
						|
#                          where all the show files reside. This file is
 | 
						|
#                          scanned to determine which shows have files other
 | 
						|
#                          than the audio so we can keep a tally of which shows
 | 
						|
#                          have had these uploaded to the IA
 | 
						|
#                          ** Not fully implemented **
 | 
						|
#                          ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 
 | 
						|
#    -episode=N1 [-episode=N2 ...]
 | 
						|
#                        In -mode=update defines one or more single episodes to
 | 
						|
#                        process otherwise, if this option is not given, all
 | 
						|
#                        new episodes in the database are processed. The
 | 
						|
#                        episodes are expected to have been already added if
 | 
						|
#                        the option is used, and the 'title' and 'summary'
 | 
						|
#                        fields and the assets (if any) are to be processed.
 | 
						|
#    -range=N1 [-range=N2]
 | 
						|
#                        An alternative way of defining existing shows to be
 | 
						|
#                        processed in -mode=update. The option can only be
 | 
						|
#                        repeated twice. The smaller number defines the start
 | 
						|
#                        of the range, and the larger one the end of the range.
 | 
						|
#                        So -range=834 -range=840 defines the range 836-840. If
 | 
						|
#                        one number is given (or two the same) this means the
 | 
						|
#                        same as defining one episode. The episodes are
 | 
						|
#                        expected to have been already added and the 'title'
 | 
						|
#                        and 'summary' fields and the assets (if any) are to be
 | 
						|
#                        processed.
 | 
						|
#    -json=FILE          Defines the file to use for -mode=json
 | 
						|
#    -scandir=FILE       Defines a file to use for -mode=scandir
 | 
						|
#    -dbconfig=FILE      Defines an alternative MySQL configuration file
 | 
						|
#
 | 
						|
#EOD
 | 
						|
#    exit(1);
 | 
						|
#}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: Options
 | 
						|
#      PURPOSE: Processes command-line options
 | 
						|
#   PARAMETERS: $optref     Hash reference to hold the options
 | 
						|
#      RETURNS: Undef
 | 
						|
#  DESCRIPTION: Process the options we want to offer. See the documentation
 | 
						|
#               for details
 | 
						|
#       THROWS: no exceptions
 | 
						|
#     COMMENTS: none
 | 
						|
#     SEE ALSO: n/a
 | 
						|
#===============================================================================
 | 
						|
sub Options {
 | 
						|
    my ($optref) = @_;
 | 
						|
 | 
						|
    my @options = (
 | 
						|
        "help",       "documentation|man",
 | 
						|
        "debug=i",    "mode=s",
 | 
						|
        "json=s",     "scandir=s",
 | 
						|
        "episode=i@", "range=i@",
 | 
						|
        "dry-run!",   "verbose!",
 | 
						|
        "dbconfig=s",
 | 
						|
    );
 | 
						|
 | 
						|
    if ( !GetOptions( $optref, @options ) ) {
 | 
						|
        pod2usage(
 | 
						|
            -msg     => "$PROG version $VERSION\n",
 | 
						|
            -exitval => 1,
 | 
						|
            -verbose => 0
 | 
						|
        );
 | 
						|
    }
 | 
						|
 | 
						|
    return;
 | 
						|
}
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | 
						|
#  Application Documentation
 | 
						|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | 
						|
#{{{
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
upload_manager - manages a SQLite database of details of shows on archive.org
 | 
						|
 | 
						|
=head1 VERSION
 | 
						|
 | 
						|
This documentation refers to upload_manager version 0.2.19
 | 
						|
 | 
						|
=head1 USAGE
 | 
						|
 | 
						|
    ./upload_manager [-help] [-documentation] [-[no]dry-run] [-[no]verbose]
 | 
						|
        [-debug=N] -mode=MODE [-episode=N1 [-episode=N2 ...]] [-range=START
 | 
						|
        [-range=END]] [-json=FILE] [-scandir=FILE] [-dbconfig=FILE]
 | 
						|
 | 
						|
 | 
						|
=head1 OPTIONS
 | 
						|
 | 
						|
=over 8
 | 
						|
 | 
						|
=item B<-help>
 | 
						|
 | 
						|
Display a brief summary of how to use this script.
 | 
						|
 | 
						|
=item B<-documentation> or B<-man>
 | 
						|
 | 
						|
Display the entirety of the internal documentation in the form of a manual
 | 
						|
page.
 | 
						|
 | 
						|
=item B<-[no]dry-run>
 | 
						|
 | 
						|
Enable/disable dry run mode (default off)
 | 
						|
 | 
						|
=item B<-[no]verbose>
 | 
						|
 | 
						|
Enable/disable verbose mode (default off)
 | 
						|
 | 
						|
=item B<-debug=N>
 | 
						|
 | 
						|
Sets the debug level:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<0>
 | 
						|
 | 
						|
This is the default value, and causes no debug output to be generated.
 | 
						|
 | 
						|
=item B<1>
 | 
						|
 | 
						|
TBA
 | 
						|
 | 
						|
=item B<2>
 | 
						|
 | 
						|
Produces quite a lot of asset-related output. All of the output from the lower debug levels is
 | 
						|
displayed as well as:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
Contents of the %assets hash. This is generated when scanning a show in the
 | 
						|
MySQL database in order to find any associated assets.
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
The episode number being checked and the number of assets found while
 | 
						|
processing assets. A dump of the assets from the SQLite database is shown as
 | 
						|
well as the count of assets found therre.
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
If there are assets related to a show and there are already some in the SQLite
 | 
						|
database, a comparison is made to see if any updates are needed. These
 | 
						|
differences are listed.
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
If new assets are being added, these are reported from the hash
 | 
						|
%assets_by_file.
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
Assets are added using a multiple INSERT statement, and details of what is to
 | 
						|
be inserted is shown in the array @multiple_insert.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=item B<3>
 | 
						|
 | 
						|
Produces the most output. All of the output from the lower debug levels is
 | 
						|
displayed as well as:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
Contents of the @episodes array.  This is generated by using the B<-episode=N>
 | 
						|
option once or many times. It is shown before and after the removal of
 | 
						|
duplicates.
 | 
						|
 | 
						|
=item .
 | 
						|
 | 
						|
Contents of the @range array.  This is generated by using the B<-range=N>
 | 
						|
option once or twice.
 | 
						|
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=item B<-mode=MODE>
 | 
						|
 | 
						|
Selects the mode of operation of the script from the following list:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<initialise>
 | 
						|
 | 
						|
Rebuilds the SQLite database from the MySQL data. Use with caution, though
 | 
						|
the script will not initialise tables which contain data.
 | 
						|
 | 
						|
=item B<update>
 | 
						|
 | 
						|
Updates the SQLite database from updates found in the MySQL database. Exactly
 | 
						|
how this is done is controlled by other options. See below for details.
 | 
						|
 | 
						|
=item B<json>
 | 
						|
 | 
						|
Load the 'archive_date' field from a JSON file. Also sets the 'uploaded' flag
 | 
						|
and populates the 'IA_URL' and 'item_last_updated' fields.
 | 
						|
 | 
						|
=item B<scandir>
 | 
						|
 | 
						|
Scans a file captured from the HPR server by running a 'find' command on the
 | 
						|
'eps' directory where all the show files reside. This file is scanned to
 | 
						|
determine which shows have files other than the audio so we can keep a tally
 | 
						|
of which shows have had these uploaded to the IA
 | 
						|
 | 
						|
** Not fully implemented; may be phased out **
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=item B<-episode=N1 [-episode=N2 ...]>
 | 
						|
 | 
						|
In B<-mode=update> defines one or more single episodes to process otherwise, if
 | 
						|
this option is not given, all new episodes in the database are processed. The
 | 
						|
episodes are expected to have been already added if this option is used, and
 | 
						|
the 'title' and 'summary' fields and the assets (if any) are to be processed.
 | 
						|
 | 
						|
=item B<-range=N1 [-range=N2]>
 | 
						|
 | 
						|
An alternative way of defining existing shows to be processed in
 | 
						|
B<-mode=update>.  The option can only be repeated twice. The smaller number
 | 
						|
defines the start of the range, and the larger one the end of the range.  So
 | 
						|
B<-range=834 -range=840> defines the range 836-840. If one number is given (or
 | 
						|
two the same) this means the same as defining one episode. The episodes are
 | 
						|
expected to have been already added and the 'title' and 'summary' fields and
 | 
						|
the assets (if any) are to be processed.
 | 
						|
 | 
						|
=item B<-json=FILE>
 | 
						|
 | 
						|
Defines the file to use for B<-mode=json>. The contents of the file are
 | 
						|
expected to be those produced by running B<collect_show_data>.
 | 
						|
 | 
						|
=item B<-scandir=FILE>
 | 
						|
 | 
						|
Defines a file to use for B<-mode=scandir>. This feature is not fully
 | 
						|
implemented at the moment.
 | 
						|
 | 
						|
=item B<-dbconfig=FILE>
 | 
						|
 | 
						|
Defines an alternative MySQL configuration file
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
A full description of the application and its features.
 | 
						|
May include numerous subsections (i.e. =head2, =head3, etc.)
 | 
						|
 | 
						|
 | 
						|
=head1 DIAGNOSTICS
 | 
						|
 | 
						|
A list of every error and warning message that the application can generate
 | 
						|
(even the ones that will "never happen"), with a full explanation of each
 | 
						|
problem, one or more likely causes, and any suggested remedies. If the
 | 
						|
application generates exit status codes (e.g. under Unix) then list the exit
 | 
						|
status associated with each error.
 | 
						|
 | 
						|
 | 
						|
=head1 CONFIGURATION AND ENVIRONMENT
 | 
						|
 | 
						|
A full explanation of any configuration system(s) used by the application,
 | 
						|
including the names and locations of any configuration files, and the
 | 
						|
meaning of any environment variables or properties that can be set. These
 | 
						|
descriptions must also include details of any configuration language used
 | 
						|
 | 
						|
 | 
						|
=head1 DEPENDENCIES
 | 
						|
 | 
						|
A list of all the other modules that this module relies upon, including any
 | 
						|
restrictions on versions, and an indication whether these required modules are
 | 
						|
part of the standard Perl distribution, part of the module's distribution,
 | 
						|
or must be installed separately.
 | 
						|
 | 
						|
 | 
						|
=head1 INCOMPATIBILITIES
 | 
						|
 | 
						|
A list of any modules that this module cannot be used in conjunction with.
 | 
						|
This may be due to name conflicts in the interface, or competition for
 | 
						|
system or program resources, or due to internal limitations of Perl
 | 
						|
(for example, many modules that use source code filters are mutually
 | 
						|
incompatible).
 | 
						|
 | 
						|
 | 
						|
=head1 BUGS AND LIMITATIONS
 | 
						|
 | 
						|
A list of known problems with the module, together with some indication
 | 
						|
whether they are likely to be fixed in an upcoming release.
 | 
						|
 | 
						|
Also a list of restrictions on the features the module does provide:
 | 
						|
data types that cannot be handled, performance issues and the circumstances
 | 
						|
in which they may arise, practical limitations on the size of data sets,
 | 
						|
special cases that are not (yet) handled, etc.
 | 
						|
 | 
						|
The initial template usually just has:
 | 
						|
 | 
						|
There are no known bugs in this module.
 | 
						|
Please report problems to <Maintainer name(s)>  (<contact address>)
 | 
						|
Patches are welcome.
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
<Author name(s)>  (<contact address>)
 | 
						|
 | 
						|
 | 
						|
=head1 LICENCE AND COPYRIGHT
 | 
						|
 | 
						|
Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
 | 
						|
 | 
						|
Followed by whatever licence you wish to release it under.
 | 
						|
For Perl code that is often just:
 | 
						|
 | 
						|
This module is free software; you can redistribute it and/or
 | 
						|
modify it under the same terms as Perl itself. See perldoc perlartistic.
 | 
						|
 | 
						|
This program is distributed in the hope that it will be useful,
 | 
						|
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
#}}}
 | 
						|
 | 
						|
# [zo to open fold, zc to close]
 | 
						|
 | 
						|
 | 
						|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 | 
						|
 |