614 lines
18 KiB
Perl
Executable File
614 lines
18 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
#===============================================================================
|
|
#
|
|
# FILE: refresh_tags
|
|
#
|
|
# USAGE: ./refresh_tags
|
|
#
|
|
# DESCRIPTION: Parse tags from the eps.tags field and use them to populate
|
|
# the tags table. The eps tag list is definitive (though it's
|
|
# quite limited since it's only 200 characters long), and so the
|
|
# tags table is kept in step by adding and deleting.
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: ---
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.0.3
|
|
# CREATED: 2016-07-17 15:59:24
|
|
# REVISION: 2017-01-30 17:13:28
|
|
#
|
|
#===============================================================================
|
|
|
|
use 5.010;
|
|
use strict;
|
|
use warnings;
|
|
use utf8;
|
|
|
|
use Carp;
|
|
use Getopt::Long;
|
|
use Config::General;
|
|
use Text::CSV;
|
|
use SQL::Abstract;
|
|
use DBI;
|
|
|
|
use Data::Dumper;
|
|
|
|
#
|
|
# Version number (manually incremented)
|
|
#
|
|
our $VERSION = '0.0.3';
|
|
|
|
#
|
|
# Script and directory names
|
|
#
|
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
|
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
|
$DIR = '.' unless $DIR;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Declarations
|
|
#-------------------------------------------------------------------------------
|
|
#
|
|
# Constants and other declarations
|
|
#
|
|
my $basedir = "$ENV{HOME}/HPR/Database";
|
|
my $configfile = "$basedir/.hpr_db.cfg";
|
|
|
|
my ( $dbh, $sth1, $h1 );
|
|
my ( $status, @fields );
|
|
my ( %eps_tags, %tags_tags, %diffs );
|
|
|
|
#
|
|
# Enable Unicode mode
|
|
#
|
|
binmode STDOUT, ":encoding(UTF-8)";
|
|
binmode STDERR, ":encoding(UTF-8)";
|
|
|
|
#
|
|
# Load configuration data
|
|
#
|
|
my $conf = Config::General->new(
|
|
-ConfigFile => $configfile,
|
|
-InterPolateVars => 1,
|
|
-ExtendedAccess => 1,
|
|
);
|
|
my %config = $conf->getall();
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Options and arguments
|
|
#-------------------------------------------------------------------------------
|
|
#
|
|
# Process options
|
|
#
|
|
my %options;
|
|
Options( \%options );
|
|
|
|
Usage() if ( $options{'help'} );
|
|
|
|
#
|
|
# Collect options
|
|
#
|
|
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
|
|
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 1 );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Connect to the database
|
|
#-------------------------------------------------------------------------------
|
|
my $dbhost = $config{database}->{host} // '127.0.0.1';
|
|
my $dbport = $config{database}->{port} // 3306;
|
|
my $dbname = $config{database}->{name};
|
|
my $dbuser = $config{database}->{user};
|
|
my $dbpwd = $config{database}->{password};
|
|
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
|
|
$dbuser, $dbpwd, { AutoCommit => 1 } )
|
|
or croak $DBI::errstr;
|
|
|
|
#
|
|
# Enable client-side UTF8
|
|
#
|
|
$dbh->{mysql_enable_utf8} = 1;
|
|
|
|
my $csv = Text::CSV_XS->new;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Collect and process the id numbers and tags from the 'eps' table
|
|
#-------------------------------------------------------------------------------
|
|
%eps_tags = %{ collect_eps_tags( $dbh, $verbose ) };
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Collect any tags we've already stashed in the database
|
|
#-------------------------------------------------------------------------------
|
|
%tags_tags = %{ collect_db_tags( $dbh, $verbose ) };
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Now compare the two sources to look for differences
|
|
#-------------------------------------------------------------------------------
|
|
%diffs = %{ find_differences(\%eps_tags,\%tags_tags) };
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Perform the updates if there are any
|
|
#-------------------------------------------------------------------------------
|
|
if (%diffs) {
|
|
print "Differences found\n";
|
|
unless ($dry_run) {
|
|
#
|
|
# Loop through all of the actions by episode number
|
|
#
|
|
foreach my $id ( sort { $a <=> $b } keys(%diffs) ) {
|
|
|
|
#
|
|
# Do deletions before additions
|
|
#
|
|
if ( exists( $diffs{$id}->{deletions} ) ) {
|
|
do_deletions( $dbh, $verbose, $id, $diffs{$id}->{deletions} );
|
|
}
|
|
|
|
#
|
|
# Do additions after deletions
|
|
#
|
|
if ( exists( $diffs{$id}->{additions} ) ) {
|
|
do_additions( $dbh, $sth1, $verbose, $id,
|
|
$diffs{$id}->{additions} );
|
|
}
|
|
|
|
}
|
|
}
|
|
else {
|
|
print "No changes made - dry run\n";
|
|
}
|
|
}
|
|
else {
|
|
print "No differences found\n";
|
|
}
|
|
|
|
exit;
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: collect_eps_tags
|
|
# PURPOSE: Collects the tags from the eps.tags field
|
|
# PARAMETERS: $dbh Database handle
|
|
# $verbose Verbosity level
|
|
# RETURNS: A reference to the hash created by collecting all the tags
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub collect_eps_tags {
|
|
my ( $dbh, $verbose ) = @_;
|
|
|
|
my ( $status, @fields, %hash );
|
|
my ( $sth, $h );
|
|
|
|
#
|
|
# For parsing the field as CSV
|
|
# NOTE: Unexplained error in [E. E. "Doc" Smith] (show 2462). Works with
|
|
# double replaced by single quote, but doesn't work if quotes escaped (by
|
|
# doubling) whether all tags are quoted or not. With 'auto_diag' enabled
|
|
# get the error:
|
|
# CSV_XS ERROR: 2034 - EIF - Loose unescaped quote @ rec 1632 pos 40 field 3
|
|
#
|
|
# NOTE: Adding 'allow_loose_quotes' avoids the issue
|
|
#
|
|
my $csv = Text::CSV_XS->new(
|
|
{ binary => 1, auto_diag => 1, allow_loose_quotes => 1 } );
|
|
|
|
#
|
|
# Query the eps table for all the id and tags
|
|
#
|
|
$sth = $dbh->prepare(
|
|
q{SELECT id,tags FROM eps
|
|
WHERE length(tags) > 0
|
|
ORDER BY id}
|
|
) or die $DBI::errstr;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
$sth->execute;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
#
|
|
# Loop through what we got
|
|
#
|
|
while ( $h = $sth->fetchrow_hashref ) {
|
|
#
|
|
# Parse the tag list
|
|
#
|
|
$status = $csv->parse( $h->{tags} );
|
|
unless ($status) {
|
|
#
|
|
# Report any errors
|
|
#
|
|
print "Parse error on episode ", $h->{id}, "\n";
|
|
print $csv->error_input(), "\n";
|
|
next;
|
|
}
|
|
@fields = $csv->fields();
|
|
|
|
next unless (@fields);
|
|
|
|
#
|
|
# Trim all tags (don't alter $_ when doing it)
|
|
#
|
|
@fields = map {
|
|
my $t = $_;
|
|
$t =~ s/(^\s+|\s+$)//g;
|
|
$t;
|
|
} @fields;
|
|
|
|
#print "$h->{id}: ",join(",",@fields),"\n";
|
|
|
|
#
|
|
# Save the id and its tags, sorted for comparison, with empty elements
|
|
# removed too
|
|
#
|
|
$hash{ $h->{id} } = [ sort grep {!/^$/} @fields ];
|
|
|
|
}
|
|
|
|
#print Dumper(\%hash),"\n";
|
|
|
|
#
|
|
# Dump all id numbers and tags if the verbose level is high enough
|
|
#
|
|
if ( $verbose >= 2 ) {
|
|
print "\nTags collected from the 'eps' table\n\n";
|
|
foreach my $id ( sort { $a <=> $b } keys(%hash) ) {
|
|
printf "%04d: %s\n", $id, join( ",", @{ $hash{$id} } );
|
|
}
|
|
}
|
|
|
|
return \%hash;
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: collect_db_tags
|
|
# PURPOSE: Collects the tags already stored in the database
|
|
# PARAMETERS: $dbh Database handle
|
|
# $verbose Verbosity level
|
|
# RETURNS: A reference to the hash created by collecting all the tags
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub collect_db_tags {
|
|
my ( $dbh, $verbose ) = @_;
|
|
|
|
my %hash;
|
|
my ( $sth, $h );
|
|
|
|
#
|
|
# Query the database for tag data
|
|
#
|
|
|
|
$sth = $dbh->prepare(q{SELECT * FROM tags ORDER BY id})
|
|
or die $DBI::errstr;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
$sth->execute;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
#
|
|
# Loop through what we got building an array of tags per episode number
|
|
#
|
|
while ( $h = $sth->fetchrow_hashref ) {
|
|
if ( defined( $hash{ $h->{id} } ) ) {
|
|
push( @{ $hash{ $h->{id} } }, $h->{tag} );
|
|
}
|
|
else {
|
|
$hash{ $h->{id} } = [ $h->{tag} ];
|
|
}
|
|
}
|
|
|
|
#
|
|
# Sort all the tag arrays for comparison
|
|
#
|
|
foreach my $id ( keys(%hash) ) {
|
|
$hash{$id} = [ sort @{ $hash{$id} } ];
|
|
}
|
|
|
|
#
|
|
# Dump all id numbers and tags if the verbose level is high enough
|
|
#
|
|
if ( $verbose >= 2 ) {
|
|
print "\nTags collected from the 'tags' table\n\n";
|
|
foreach my $id ( sort { $a <=> $b } keys(%hash) ) {
|
|
printf "%04d: %s\n", $id, join( ",", @{ $hash{$id} } );
|
|
}
|
|
print '=-' x 40,"\n";
|
|
}
|
|
|
|
return \%hash;
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: find_differences
|
|
# PURPOSE: Find the differences between two hashes containing tags
|
|
# PARAMETERS: $master Reference to the master hash
|
|
# $slave Reference to the slave hash
|
|
# RETURNS: A reference to the hash created checking for differences
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub find_differences {
|
|
my ($master,$slave) = @_;
|
|
|
|
my %hash;
|
|
|
|
foreach my $id ( sort { $a <=> $b } keys(%$master) ) {
|
|
my %iddiffs = array_compare( $master->{$id}, $slave->{$id} );
|
|
if (%iddiffs) {
|
|
if ( $verbose >= 1 ) {
|
|
#
|
|
# Report what was found if asked to
|
|
#
|
|
print "Episode: $id\n";
|
|
print "Update:\n\teps: ", join( ",", @{ $master->{$id} } ), "\n";
|
|
print "\ttags: ",
|
|
(
|
|
defined( $slave->{$id} )
|
|
? join( ",", @{ $slave->{$id} } )
|
|
: '--None--' ), "\n";
|
|
print '-' x 80,"\n";
|
|
}
|
|
$hash{$id} = {%iddiffs};
|
|
}
|
|
}
|
|
|
|
#
|
|
# Report differences and actions if the verbose level is high enough
|
|
#
|
|
if ( $verbose >= 2 ) {
|
|
print "\nDifferences and actions\n\n";
|
|
foreach my $id ( sort { $a <=> $b } keys(%hash) ) {
|
|
print "Episode: $id\n";
|
|
if ( exists( $hash{$id}->{deletions} ) ) {
|
|
print "Deletions: ";
|
|
print join( ",", @{ $hash{$id}->{deletions} } ), "\n";
|
|
}
|
|
if ( exists( $hash{$id}->{additions} ) ) {
|
|
print "Additions: ";
|
|
print join( ",", @{ $hash{$id}->{additions} } ), "\n";
|
|
}
|
|
print '-' x 80, "\n";
|
|
}
|
|
}
|
|
|
|
return \%hash;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_deletions
|
|
# PURPOSE: Perform any deletions indicated in an array for a given
|
|
# episode
|
|
# PARAMETERS: $dbh Database handle
|
|
# $verbose Verbosity level
|
|
# $id Episode number
|
|
# $tags Reference to an array of tags for this episode
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_deletions {
|
|
my ( $dbh, $verbose, $id, $tags ) = @_;
|
|
|
|
my ( $stmt, @bind, %data, %where );
|
|
|
|
#
|
|
# We will dynamically build SQL as we go
|
|
#
|
|
my $sql = SQL::Abstract->new;
|
|
|
|
#
|
|
# Process the list of tags we have been given
|
|
#
|
|
for my $i ( 0 .. $#$tags ) {
|
|
#
|
|
# Set up a deletion '... where id = ? and tag = ?'
|
|
#
|
|
%where = ( id => $id, tag => $tags->[$i] );
|
|
|
|
( $stmt, @bind ) = $sql->delete( 'tags', \%where );
|
|
|
|
my $sth = $dbh->prepare($stmt);
|
|
my $rv = $sth->execute(@bind);
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
$rv = 0 if ( $rv eq '0E0' );
|
|
|
|
#
|
|
# Report the action
|
|
#
|
|
if ($rv) {
|
|
print "Deleted tag for show $id ($tags->[$i])\n";
|
|
}
|
|
|
|
}
|
|
|
|
print "Deleted ", scalar(@$tags), " row",
|
|
( scalar(@$tags) != 1 ? 's' : '' ), "\n";
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_additions
|
|
# PURPOSE: Perform any additions indicated in an array for a given
|
|
# episode
|
|
# PARAMETERS: $dbh Database handle
|
|
# $sth A prepared database handle with a query to
|
|
# search for the target tag
|
|
# $verbose Verbosity level
|
|
# $id Episode number
|
|
# $tags Reference to an array of tags for this episode
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: FIXME
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_additions {
|
|
my ( $dbh, $sth, $verbose, $id, $tags ) = @_;
|
|
|
|
my ( $sth1, $rv, $h, $tid, $stmt, @bind, %data );
|
|
|
|
#
|
|
# We will dynamically build SQL as we go
|
|
#
|
|
my $sql = SQL::Abstract->new;
|
|
|
|
my @lctags = map { lc($_) } @$tags;
|
|
|
|
for my $i ( 0 .. $#$tags ) {
|
|
#
|
|
# Build the row we're going to add
|
|
#
|
|
%data = (
|
|
id => $id,
|
|
tag => $tags->[$i],
|
|
lctag => $lctags[$i]
|
|
);
|
|
|
|
( $stmt, @bind ) = $sql->insert( 'tags', \%data );
|
|
|
|
my $sth = $dbh->prepare($stmt);
|
|
my $rv = $sth->execute(@bind);
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
$rv = 0 if ( $rv eq '0E0' );
|
|
|
|
#
|
|
# Report the action
|
|
#
|
|
if ($rv) {
|
|
print "Added tag for show $id ($tags->[$i])\n";
|
|
}
|
|
}
|
|
|
|
print "Added ", scalar(@$tags), " row",
|
|
( scalar(@$tags) != 1 ? 's' : '' ), "\n";
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: array_compare
|
|
# PURPOSE: Compares the elements of two arrays to see if an element
|
|
# present in the master is also present in the slave
|
|
# PARAMETERS: $arr1 A reference to the first array; the MASTER
|
|
# $arr2 A reference to the second array; the SLAVE
|
|
# RETURNS: A hash containing arrays of additions and deletions of the
|
|
# elements that are different. The structure is:
|
|
# {
|
|
# additions => [ tag1, tag2 .. tagn ],
|
|
# deletions => [ tag1, tag2 .. tagn ],
|
|
# }
|
|
# The returned hash will be empty if there are no differences.
|
|
# DESCRIPTION: The requirement is to find if there are differences, then to
|
|
# find what they are so that other code can make the slave array
|
|
# match the master. The two arrays come from a database, so
|
|
# we're trying to make a second source (slave) equal the first
|
|
# (master).
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub array_compare {
|
|
my ( $arr1, $arr2 ) = @_;
|
|
|
|
my %res;
|
|
my ( @additions, @deletions );
|
|
my %h1 = map { $_ => 1 } @$arr1;
|
|
my %h2 = map { $_ => 1 } @$arr2;
|
|
|
|
#
|
|
# Find additions
|
|
#
|
|
for my $key ( keys(%h1) ) {
|
|
unless ( exists( $h2{$key} ) ) {
|
|
push( @additions, $key );
|
|
}
|
|
}
|
|
|
|
#
|
|
# Find deletions
|
|
#
|
|
for my $key ( keys(%h2) ) {
|
|
unless ( exists( $h1{$key} ) ) {
|
|
push( @deletions, $key );
|
|
}
|
|
}
|
|
|
|
$res{additions} = [@additions] if @additions;
|
|
$res{deletions} = [@deletions] if @deletions;
|
|
|
|
return %res;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: Usage
|
|
# PURPOSE: Display a usage message and exit
|
|
# PARAMETERS: None
|
|
# RETURNS: To command line level with exit value 1
|
|
# DESCRIPTION: Builds the usage message using global values
|
|
# THROWS: no exceptions
|
|
# COMMENTS: none
|
|
# SEE ALSO: n/a
|
|
#===============================================================================
|
|
sub Usage {
|
|
print STDERR <<EOD;
|
|
Usage: $PROG [options] project
|
|
|
|
$PROG v$VERSION
|
|
|
|
-help Display this information
|
|
-[no]dry-run Display what would have been done but make no changes.
|
|
Default is -dry-run.
|
|
-verbose A repeatable option which turns up the verbosity from
|
|
0 (silent) to 2 (lots of stuff). Default is 0.
|
|
|
|
EOD
|
|
exit(1);
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: Options
|
|
# PURPOSE: Processes command-line options
|
|
# PARAMETERS: $optref Hash reference to hold the options
|
|
# RETURNS: Undef
|
|
# DESCRIPTION:
|
|
# THROWS: no exceptions
|
|
# COMMENTS: none
|
|
# SEE ALSO: n/a
|
|
#===============================================================================
|
|
sub Options {
|
|
my ($optref) = @_;
|
|
|
|
my @options = ( "help", "verbose+", "dry-run!", );
|
|
|
|
if ( !GetOptions( $optref, @options ) ) {
|
|
Usage();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|
|
|