1
0
forked from HPR/hpr-tools
hpr-tools/Database/find_double_hosts

426 lines
12 KiB
Plaintext
Raw Permalink Normal View History

#!/usr/bin/env perl
#===============================================================================
#
# FILE: find_double_hosts
#
# USAGE: ./find_double_hosts
#
# DESCRIPTION: Find HPR shows with two hosts (host is "A and B"), find the
# hosts if possible and flag updates to the database to
# represent the dual nature.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2017-10-13 19:17:51
# REVISION: 2017-10-13 19:19:43
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Config::General;
use DBI;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Database";
my $configfile1 = "$basedir/.hpr_db.cfg";
my $configfile2 = "$basedir/.hpr_pg.cfg";
my $email_template = 'host_%s@hackerpublicradio.org';
my $default_licence = 'CC-BY-SA';
my ( $dbh1, $dbh2, $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 );
my ( %doubles, @h, %hosts, $unknown, $default_email );
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#
# Load database configuration data
#
my $conf1 = Config::General->new(
-ConfigFile => $configfile1,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config1 = $conf1->getall();
my $conf2 = Config::General->new(
-ConfigFile => $configfile2,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config2 = $conf2->getall();
#-------------------------------------------------------------------------------
# Connect to the MariaDB database
#-------------------------------------------------------------------------------
my $dbtype1 = $config1{database}->{type} // 'mysql';
my $dbhost1 = $config1{database}->{host} // '127.0.0.1';
my $dbport1 = $config1{database}->{port} // 3306;
my $dbname1 = $config1{database}->{name};
my $dbuser1 = $config1{database}->{user};
my $dbpwd1 = $config1{database}->{password};
$dbh1
= DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1",
$dbuser1, $dbpwd1, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh1->{mysql_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Connect to the PostgreSQL database
#-------------------------------------------------------------------------------
my $dbtype2 = $config2{database}->{type} // 'Pg';
my $dbhost2 = $config2{database}->{host} // '127.0.0.1';
my $dbport2 = $config2{database}->{port} // 5432;
my $dbname2 = $config2{database}->{name};
my $dbuser2 = $config2{database}->{user};
my $dbpwd2 = $config2{database}->{password};
$dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2",
$dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh2->{pg_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Query preparation
#-------------------------------------------------------------------------------
#
# MariaDB query to find double hosts
#
my $sql1 = q{
SELECT hostid, host FROM hosts
WHERE host regexp '[[:<:]]and[[:>:]]'
ORDER BY hostid
};
$sth1 = $dbh1->prepare($sql1) or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# MariaDB query to find the host by name
#
$sth2 = $dbh1->prepare(q{SELECT hostid FROM hosts WHERE host REGEXP ?})
or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# PostgreSQL query to register an unknown host
#
$sth3
= $dbh2->prepare(q{INSERT INTO hosts (host,email,license) VALUES (?,?,?)})
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# PostgreSQL query to find shows with particular host ids
#
$sth4 = $dbh2->prepare(
q{
SELECT e.id AS eps_id
FROM episodes e
JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id)
JOIN hosts h ON (h.id = eh.hosts_id)
WHERE h.id = ?
}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#-------------------------------------------------------------------------------
# Find all the "double hosts"
#-------------------------------------------------------------------------------
#
# Query MariaDB for the target hosts
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop through the list of double hostnames and parse them out. Save the
# originals in the %doubles hash and the parsed names in the %hosts hash.
#
while ( $h1 = $sth1->fetchrow_hashref ) {
#
# Each hash value is a hash containing the original id, and, in a sub-hash
# the replacement ids
#
$doubles{$h1->{host}} = {
double => $h1->{hostid},
singles => {},
};
#
# Parse the double host string
#
@h = ( $h1->{host} =~ /^(.+)\s+and\s+(.+)$/ );
printf "%-4d %s", $h1->{hostid}, $h1->{host};
print " [", join( ",", @h ), "]\n";
#
# Initialise the entries for %doubles and %hosts
#
for my $host (@h) {
$doubles{$h1->{host}}->{singles}->{$host} = undef;
unless ( exists( $hosts{$host} ) ) {
$hosts{$host} = 0;
}
}
}
print '-' x 80,"\n";
#-------------------------------------------------------------------------------
# Find the single hosts in the 'hosts' table
#-------------------------------------------------------------------------------
#
# Scan the list of individual hosts and find them in the 'hosts' table
#
$unknown = 0;
foreach my $host ( sort(keys(%hosts)) ) {
$rv2 = $sth2->execute("^$host\$");
if ( $dbh1->err ) {
die $dbh1->errstr;
}
$rv2 = 0 if ( $rv2 eq '0E0' );
if ($rv2) {
$h2 = $sth2->fetchrow_hashref;
print "Found id for $host: ", $h2->{hostid}, "\n";
$hosts{$host} = $h2->{hostid};
save_hostid(\%doubles,$host,$h2->{hostid});
}
else {
print "Can't find $host\n";
$unknown++;
}
}
#print Dumper(\%hosts),"\n";
print '-' x 80,"\n";
#-------------------------------------------------------------------------------
# Allocate all unknown hosts a host id in the PostgreSQL database, and give an
# unique email address.
#-------------------------------------------------------------------------------
if ( $unknown > 0 ) {
print "Registering $unknown hosts\n";
foreach my $host ( sort( keys(%hosts) ) ) {
if ( $hosts{$host} == 0 ) {
$rv3 = $sth3->execute( $host, undef, $default_licence );
if ( $dbh2->err ) {
die $dbh2->errstr;
}
#
# Write a row to the 'hosts' table and save the id number
# generated
#
my $newid = $dbh2->last_insert_id( undef, undef, undef, undef,
{ sequence => 'host_seq' } );
$hosts{$host} = $newid;
save_hostid(\%doubles,$host,$newid);
print "Host $host added with id $newid\n";
#
# Give the new host entry a default email address
#
$default_email = sprintf($email_template,$newid);
$rv3 = $dbh2->do( 'UPDATE hosts SET email = ? WHERE id = ?',
undef, $default_email, $newid );
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$rv3 = 0 if ( $rv3 eq '0E0' );
warn "Failed to set email address $default_email for $host\n"
unless ( defined($rv3) );
}
}
}
print '-' x 80,"\n";
#-------------------------------------------------------------------------------
# Now %doubles contains all the original names and host ids and %hosts
# contains the parsed out names and their ids. We can look for shows
# attributed to the first set and re-attribute them to the second set.
#-------------------------------------------------------------------------------
print "Changing host associations for shows with two hosts\n";
foreach my $double ( sort( keys(%doubles) ) ) {
print "Processing $double\n";
my ( $doubleid, @newids ) = (
$doubles{$double}->{double},
values( %{ $doubles{$double}->{singles} } )
);
print " Original id: $doubleid\n";
print " Replacements: ", join( ", ", @newids ), "\n";
#
# Find shows marked as belonging to this double-host
#
$sth4->execute($doubleid);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
#
# Process all the shows
#
while ( $h4 = $sth4->fetchrow_hashref ) {
my $eps_id = $h4->{eps_id};
print " Show $eps_id is ascribed to host $doubleid\n";
$dbh2->begin_work();
#
# Delete the xref link for the double host
#
$rv4
= $dbh2->do(
'DELETE FROM episodes_hosts_xref WHERE episodes_id = ?',
undef, $eps_id );
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$rv4 = 0 if ( $rv4 eq '0E0' );
if ( defined($rv4) ) {
print " Deleted entry from episodes_hosts_xref for $eps_id\n";
}
else {
warn "Problem deleting from episodes_hosts_xref for $eps_id\n";
}
#
# Add links for the single hosts
#
foreach my $hid (@newids) {
$rv4 = $dbh2->do( 'INSERT INTO episodes_hosts_xref VALUES (?,?)',
undef, $eps_id, $hid );
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$rv4 = 0 if ( $rv4 eq '0E0' );
if ( defined($rv4) ) {
print " Added entry to episodes_hosts_xref values ",
"$eps_id,$hid\n";
}
else {
warn "Problem adding to episodes_hosts_xref values "
. "$eps_id,$hid\n";
}
}
#
# Commit the delete/inserts above
#
$dbh2->commit();
}
print '~' x 80, "\n";
#
# Delete the double host (NOTE: This will fail due to referential
# integrity if the DELETE above failed, so there is scope for debris to be
# left around)
#
$rv4 = $dbh2->do( 'DELETE FROM hosts WHERE id = ?', undef, $doubleid );
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$rv4 = 0 if ( $rv4 eq '0E0' );
if ( defined($rv4) ) {
print " Deleted entry from hosts for id $doubleid ($double)\n";
}
else {
warn "Problem deleting from hosts for id $doubleid ($double)\n";
}
}
print '-' x 80,"\n";
exit;
#=== FUNCTION ================================================================
# NAME: save_hostid
# PURPOSE: Saves the host id after searching for the key in the %doubles
# hash
# PARAMETERS: $doubles hashref to %doubles
# $host host key
# $hostid host id number
# RETURNS: Nothing
# DESCRIPTION: Searches the %doubles hash for particular keys in the
# 'singles' sub-hash. If found saves the corresponding host id
# there.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub save_hostid {
my ( $doubles, $host, $hostid ) = @_;
foreach my $key ( keys(%$doubles) ) {
if ( exists( $doubles->{$key}->{singles}->{$host} ) ) {
$doubles->{$key}->{singles}->{$host} = $hostid;
}
}
}
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker