426 lines
12 KiB
Plaintext
426 lines
12 KiB
Plaintext
|
#!/usr/bin/env perl
|
||
|
#===============================================================================
|
||
|
#
|
||
|
# FILE: find_double_hosts
|
||
|
#
|
||
|
# USAGE: ./find_double_hosts
|
||
|
#
|
||
|
# DESCRIPTION: Find HPR shows with two hosts (host is "A and B"), find the
|
||
|
# hosts if possible and flag updates to the database to
|
||
|
# represent the dual nature.
|
||
|
#
|
||
|
# OPTIONS: ---
|
||
|
# REQUIREMENTS: ---
|
||
|
# BUGS: ---
|
||
|
# NOTES: ---
|
||
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||
|
# VERSION: 0.0.2
|
||
|
# CREATED: 2017-10-13 19:17:51
|
||
|
# REVISION: 2017-10-13 19:19:43
|
||
|
#
|
||
|
#===============================================================================
|
||
|
|
||
|
use 5.010;
|
||
|
use strict;
|
||
|
use warnings;
|
||
|
use utf8;
|
||
|
|
||
|
use Config::General;
|
||
|
use DBI;
|
||
|
|
||
|
use Data::Dumper;
|
||
|
|
||
|
#
|
||
|
# Version number (manually incremented)
|
||
|
#
|
||
|
our $VERSION = '0.0.2';
|
||
|
|
||
|
#
|
||
|
# Script and directory names
|
||
|
#
|
||
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
||
|
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
||
|
$DIR = '.' unless $DIR;
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Declarations
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Constants and other declarations
|
||
|
#
|
||
|
my $basedir = "$ENV{HOME}/HPR/Database";
|
||
|
my $configfile1 = "$basedir/.hpr_db.cfg";
|
||
|
my $configfile2 = "$basedir/.hpr_pg.cfg";
|
||
|
|
||
|
my $email_template = 'host_%s@hackerpublicradio.org';
|
||
|
my $default_licence = 'CC-BY-SA';
|
||
|
|
||
|
my ( $dbh1, $dbh2, $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 );
|
||
|
|
||
|
my ( %doubles, @h, %hosts, $unknown, $default_email );
|
||
|
|
||
|
#
|
||
|
# Enable Unicode mode
|
||
|
#
|
||
|
binmode STDOUT, ":encoding(UTF-8)";
|
||
|
binmode STDERR, ":encoding(UTF-8)";
|
||
|
|
||
|
#
|
||
|
# Load database configuration data
|
||
|
#
|
||
|
my $conf1 = Config::General->new(
|
||
|
-ConfigFile => $configfile1,
|
||
|
-InterPolateVars => 1,
|
||
|
-ExtendedAccess => 1
|
||
|
);
|
||
|
my %config1 = $conf1->getall();
|
||
|
|
||
|
my $conf2 = Config::General->new(
|
||
|
-ConfigFile => $configfile2,
|
||
|
-InterPolateVars => 1,
|
||
|
-ExtendedAccess => 1
|
||
|
);
|
||
|
my %config2 = $conf2->getall();
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Connect to the MariaDB database
|
||
|
#-------------------------------------------------------------------------------
|
||
|
my $dbtype1 = $config1{database}->{type} // 'mysql';
|
||
|
my $dbhost1 = $config1{database}->{host} // '127.0.0.1';
|
||
|
my $dbport1 = $config1{database}->{port} // 3306;
|
||
|
my $dbname1 = $config1{database}->{name};
|
||
|
my $dbuser1 = $config1{database}->{user};
|
||
|
my $dbpwd1 = $config1{database}->{password};
|
||
|
$dbh1
|
||
|
= DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1",
|
||
|
$dbuser1, $dbpwd1, { AutoCommit => 1 } )
|
||
|
or die $DBI::errstr;
|
||
|
|
||
|
#
|
||
|
# Enable client-side UTF8
|
||
|
#
|
||
|
$dbh1->{mysql_enable_utf8} = 1;
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Connect to the PostgreSQL database
|
||
|
#-------------------------------------------------------------------------------
|
||
|
my $dbtype2 = $config2{database}->{type} // 'Pg';
|
||
|
my $dbhost2 = $config2{database}->{host} // '127.0.0.1';
|
||
|
my $dbport2 = $config2{database}->{port} // 5432;
|
||
|
my $dbname2 = $config2{database}->{name};
|
||
|
my $dbuser2 = $config2{database}->{user};
|
||
|
my $dbpwd2 = $config2{database}->{password};
|
||
|
$dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2",
|
||
|
$dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } )
|
||
|
or die $DBI::errstr;
|
||
|
|
||
|
#
|
||
|
# Enable client-side UTF8
|
||
|
#
|
||
|
$dbh2->{pg_enable_utf8} = 1;
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Query preparation
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# MariaDB query to find double hosts
|
||
|
#
|
||
|
my $sql1 = q{
|
||
|
SELECT hostid, host FROM hosts
|
||
|
WHERE host regexp '[[:<:]]and[[:>:]]'
|
||
|
ORDER BY hostid
|
||
|
};
|
||
|
|
||
|
$sth1 = $dbh1->prepare($sql1) or die $DBI::errstr;
|
||
|
if ( $dbh1->err ) {
|
||
|
warn $dbh1->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# MariaDB query to find the host by name
|
||
|
#
|
||
|
$sth2 = $dbh1->prepare(q{SELECT hostid FROM hosts WHERE host REGEXP ?})
|
||
|
or die $DBI::errstr;
|
||
|
if ( $dbh1->err ) {
|
||
|
warn $dbh1->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# PostgreSQL query to register an unknown host
|
||
|
#
|
||
|
$sth3
|
||
|
= $dbh2->prepare(q{INSERT INTO hosts (host,email,license) VALUES (?,?,?)})
|
||
|
or die $DBI::errstr;
|
||
|
if ( $dbh2->err ) {
|
||
|
warn $dbh2->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# PostgreSQL query to find shows with particular host ids
|
||
|
#
|
||
|
$sth4 = $dbh2->prepare(
|
||
|
q{
|
||
|
SELECT e.id AS eps_id
|
||
|
FROM episodes e
|
||
|
JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id)
|
||
|
JOIN hosts h ON (h.id = eh.hosts_id)
|
||
|
WHERE h.id = ?
|
||
|
}
|
||
|
) or die $DBI::errstr;
|
||
|
if ( $dbh2->err ) {
|
||
|
warn $dbh2->errstr;
|
||
|
}
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Find all the "double hosts"
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Query MariaDB for the target hosts
|
||
|
#
|
||
|
$sth1->execute;
|
||
|
if ( $dbh1->err ) {
|
||
|
die $dbh1->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Loop through the list of double hostnames and parse them out. Save the
|
||
|
# originals in the %doubles hash and the parsed names in the %hosts hash.
|
||
|
#
|
||
|
while ( $h1 = $sth1->fetchrow_hashref ) {
|
||
|
#
|
||
|
# Each hash value is a hash containing the original id, and, in a sub-hash
|
||
|
# the replacement ids
|
||
|
#
|
||
|
$doubles{$h1->{host}} = {
|
||
|
double => $h1->{hostid},
|
||
|
singles => {},
|
||
|
};
|
||
|
|
||
|
#
|
||
|
# Parse the double host string
|
||
|
#
|
||
|
@h = ( $h1->{host} =~ /^(.+)\s+and\s+(.+)$/ );
|
||
|
|
||
|
printf "%-4d %s", $h1->{hostid}, $h1->{host};
|
||
|
print " [", join( ",", @h ), "]\n";
|
||
|
|
||
|
#
|
||
|
# Initialise the entries for %doubles and %hosts
|
||
|
#
|
||
|
for my $host (@h) {
|
||
|
$doubles{$h1->{host}}->{singles}->{$host} = undef;
|
||
|
unless ( exists( $hosts{$host} ) ) {
|
||
|
$hosts{$host} = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
print '-' x 80,"\n";
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Find the single hosts in the 'hosts' table
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Scan the list of individual hosts and find them in the 'hosts' table
|
||
|
#
|
||
|
$unknown = 0;
|
||
|
foreach my $host ( sort(keys(%hosts)) ) {
|
||
|
$rv2 = $sth2->execute("^$host\$");
|
||
|
if ( $dbh1->err ) {
|
||
|
die $dbh1->errstr;
|
||
|
}
|
||
|
$rv2 = 0 if ( $rv2 eq '0E0' );
|
||
|
|
||
|
if ($rv2) {
|
||
|
$h2 = $sth2->fetchrow_hashref;
|
||
|
print "Found id for $host: ", $h2->{hostid}, "\n";
|
||
|
$hosts{$host} = $h2->{hostid};
|
||
|
save_hostid(\%doubles,$host,$h2->{hostid});
|
||
|
}
|
||
|
else {
|
||
|
print "Can't find $host\n";
|
||
|
$unknown++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#print Dumper(\%hosts),"\n";
|
||
|
|
||
|
print '-' x 80,"\n";
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Allocate all unknown hosts a host id in the PostgreSQL database, and give an
|
||
|
# unique email address.
|
||
|
#-------------------------------------------------------------------------------
|
||
|
if ( $unknown > 0 ) {
|
||
|
print "Registering $unknown hosts\n";
|
||
|
|
||
|
foreach my $host ( sort( keys(%hosts) ) ) {
|
||
|
if ( $hosts{$host} == 0 ) {
|
||
|
$rv3 = $sth3->execute( $host, undef, $default_licence );
|
||
|
if ( $dbh2->err ) {
|
||
|
die $dbh2->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Write a row to the 'hosts' table and save the id number
|
||
|
# generated
|
||
|
#
|
||
|
my $newid = $dbh2->last_insert_id( undef, undef, undef, undef,
|
||
|
{ sequence => 'host_seq' } );
|
||
|
$hosts{$host} = $newid;
|
||
|
save_hostid(\%doubles,$host,$newid);
|
||
|
print "Host $host added with id $newid\n";
|
||
|
|
||
|
#
|
||
|
# Give the new host entry a default email address
|
||
|
#
|
||
|
$default_email = sprintf($email_template,$newid);
|
||
|
$rv3 = $dbh2->do( 'UPDATE hosts SET email = ? WHERE id = ?',
|
||
|
undef, $default_email, $newid );
|
||
|
if ( $dbh2->err ) {
|
||
|
warn $dbh2->errstr;
|
||
|
}
|
||
|
$rv3 = 0 if ( $rv3 eq '0E0' );
|
||
|
|
||
|
warn "Failed to set email address $default_email for $host\n"
|
||
|
unless ( defined($rv3) );
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
print '-' x 80,"\n";
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Now %doubles contains all the original names and host ids and %hosts
|
||
|
# contains the parsed out names and their ids. We can look for shows
|
||
|
# attributed to the first set and re-attribute them to the second set.
|
||
|
#-------------------------------------------------------------------------------
|
||
|
print "Changing host associations for shows with two hosts\n";
|
||
|
foreach my $double ( sort( keys(%doubles) ) ) {
|
||
|
print "Processing $double\n";
|
||
|
my ( $doubleid, @newids ) = (
|
||
|
$doubles{$double}->{double},
|
||
|
values( %{ $doubles{$double}->{singles} } )
|
||
|
);
|
||
|
print " Original id: $doubleid\n";
|
||
|
print " Replacements: ", join( ", ", @newids ), "\n";
|
||
|
|
||
|
#
|
||
|
# Find shows marked as belonging to this double-host
|
||
|
#
|
||
|
$sth4->execute($doubleid);
|
||
|
if ( $dbh2->err ) {
|
||
|
die $dbh2->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Process all the shows
|
||
|
#
|
||
|
while ( $h4 = $sth4->fetchrow_hashref ) {
|
||
|
my $eps_id = $h4->{eps_id};
|
||
|
print " Show $eps_id is ascribed to host $doubleid\n";
|
||
|
|
||
|
$dbh2->begin_work();
|
||
|
|
||
|
#
|
||
|
# Delete the xref link for the double host
|
||
|
#
|
||
|
$rv4
|
||
|
= $dbh2->do(
|
||
|
'DELETE FROM episodes_hosts_xref WHERE episodes_id = ?',
|
||
|
undef, $eps_id );
|
||
|
if ( $dbh2->err ) {
|
||
|
warn $dbh2->errstr;
|
||
|
}
|
||
|
$rv4 = 0 if ( $rv4 eq '0E0' );
|
||
|
|
||
|
if ( defined($rv4) ) {
|
||
|
print " Deleted entry from episodes_hosts_xref for $eps_id\n";
|
||
|
}
|
||
|
else {
|
||
|
warn "Problem deleting from episodes_hosts_xref for $eps_id\n";
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Add links for the single hosts
|
||
|
#
|
||
|
foreach my $hid (@newids) {
|
||
|
$rv4 = $dbh2->do( 'INSERT INTO episodes_hosts_xref VALUES (?,?)',
|
||
|
undef, $eps_id, $hid );
|
||
|
if ( $dbh2->err ) {
|
||
|
warn $dbh2->errstr;
|
||
|
}
|
||
|
$rv4 = 0 if ( $rv4 eq '0E0' );
|
||
|
|
||
|
if ( defined($rv4) ) {
|
||
|
print " Added entry to episodes_hosts_xref values ",
|
||
|
"$eps_id,$hid\n";
|
||
|
}
|
||
|
else {
|
||
|
warn "Problem adding to episodes_hosts_xref values "
|
||
|
. "$eps_id,$hid\n";
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Commit the delete/inserts above
|
||
|
#
|
||
|
$dbh2->commit();
|
||
|
|
||
|
}
|
||
|
print '~' x 80, "\n";
|
||
|
|
||
|
#
|
||
|
# Delete the double host (NOTE: This will fail due to referential
|
||
|
# integrity if the DELETE above failed, so there is scope for debris to be
|
||
|
# left around)
|
||
|
#
|
||
|
$rv4 = $dbh2->do( 'DELETE FROM hosts WHERE id = ?', undef, $doubleid );
|
||
|
if ( $dbh2->err ) {
|
||
|
warn $dbh2->errstr;
|
||
|
}
|
||
|
$rv4 = 0 if ( $rv4 eq '0E0' );
|
||
|
|
||
|
if ( defined($rv4) ) {
|
||
|
print " Deleted entry from hosts for id $doubleid ($double)\n";
|
||
|
}
|
||
|
else {
|
||
|
warn "Problem deleting from hosts for id $doubleid ($double)\n";
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
print '-' x 80,"\n";
|
||
|
|
||
|
exit;
|
||
|
|
||
|
#=== FUNCTION ================================================================
|
||
|
# NAME: save_hostid
|
||
|
# PURPOSE: Saves the host id after searching for the key in the %doubles
|
||
|
# hash
|
||
|
# PARAMETERS: $doubles hashref to %doubles
|
||
|
# $host host key
|
||
|
# $hostid host id number
|
||
|
# RETURNS: Nothing
|
||
|
# DESCRIPTION: Searches the %doubles hash for particular keys in the
|
||
|
# 'singles' sub-hash. If found saves the corresponding host id
|
||
|
# there.
|
||
|
# THROWS: No exceptions
|
||
|
# COMMENTS: None
|
||
|
# SEE ALSO: N/A
|
||
|
#===============================================================================
|
||
|
sub save_hostid {
|
||
|
my ( $doubles, $host, $hostid ) = @_;
|
||
|
|
||
|
foreach my $key ( keys(%$doubles) ) {
|
||
|
if ( exists( $doubles->{$key}->{singles}->{$host} ) ) {
|
||
|
$doubles->{$key}->{singles}->{$host} = $hostid;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|
||
|
|