#!/usr/bin/env perl #=============================================================================== # # FILE: find_double_hosts # # USAGE: ./find_double_hosts # # DESCRIPTION: Find HPR shows with two hosts (host is "A and B"), find the # hosts if possible and flag updates to the database to # represent the dual nature. # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.0.2 # CREATED: 2017-10-13 19:17:51 # REVISION: 2017-10-13 19:19:43 # #=============================================================================== use 5.010; use strict; use warnings; use utf8; use Config::General; use DBI; use Data::Dumper; # # Version number (manually incremented) # our $VERSION = '0.0.2'; # # Script and directory names # ( my $PROG = $0 ) =~ s|.*/||mx; ( my $DIR = $0 ) =~ s|/?[^/]*$||mx; $DIR = '.' unless $DIR; #------------------------------------------------------------------------------- # Declarations #------------------------------------------------------------------------------- # # Constants and other declarations # my $basedir = "$ENV{HOME}/HPR/Database"; my $configfile1 = "$basedir/.hpr_db.cfg"; my $configfile2 = "$basedir/.hpr_pg.cfg"; my $email_template = 'host_%s@hackerpublicradio.org'; my $default_licence = 'CC-BY-SA'; my ( $dbh1, $dbh2, $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 ); my ( %doubles, @h, %hosts, $unknown, $default_email ); # # Enable Unicode mode # binmode STDOUT, ":encoding(UTF-8)"; binmode STDERR, ":encoding(UTF-8)"; # # Load database configuration data # my $conf1 = Config::General->new( -ConfigFile => $configfile1, -InterPolateVars => 1, -ExtendedAccess => 1 ); my %config1 = $conf1->getall(); my $conf2 = Config::General->new( -ConfigFile => $configfile2, -InterPolateVars => 1, -ExtendedAccess => 1 ); my %config2 = $conf2->getall(); #------------------------------------------------------------------------------- # Connect to the MariaDB database #------------------------------------------------------------------------------- my $dbtype1 = $config1{database}->{type} // 'mysql'; my $dbhost1 = $config1{database}->{host} // '127.0.0.1'; my $dbport1 = $config1{database}->{port} // 3306; my $dbname1 = $config1{database}->{name}; my $dbuser1 = $config1{database}->{user}; my $dbpwd1 = $config1{database}->{password}; $dbh1 = DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1", $dbuser1, $dbpwd1, { AutoCommit => 1 } ) or die $DBI::errstr; # # Enable client-side UTF8 # $dbh1->{mysql_enable_utf8} = 1; #------------------------------------------------------------------------------- # Connect to the PostgreSQL database #------------------------------------------------------------------------------- my $dbtype2 = $config2{database}->{type} // 'Pg'; my $dbhost2 = $config2{database}->{host} // '127.0.0.1'; my $dbport2 = $config2{database}->{port} // 5432; my $dbname2 = $config2{database}->{name}; my $dbuser2 = $config2{database}->{user}; my $dbpwd2 = $config2{database}->{password}; $dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2", $dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } ) or die $DBI::errstr; # # Enable client-side UTF8 # $dbh2->{pg_enable_utf8} = 1; #------------------------------------------------------------------------------- # Query preparation #------------------------------------------------------------------------------- # # MariaDB query to find double hosts # my $sql1 = q{ SELECT hostid, host FROM hosts WHERE host regexp '[[:<:]]and[[:>:]]' ORDER BY hostid }; $sth1 = $dbh1->prepare($sql1) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # MariaDB query to find the host by name # $sth2 = $dbh1->prepare(q{SELECT hostid FROM hosts WHERE host REGEXP ?}) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # PostgreSQL query to register an unknown host # $sth3 = $dbh2->prepare(q{INSERT INTO hosts (host,email,license) VALUES (?,?,?)}) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # PostgreSQL query to find shows with particular host ids # $sth4 = $dbh2->prepare( q{ SELECT e.id AS eps_id FROM episodes e JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id) JOIN hosts h ON (h.id = eh.hosts_id) WHERE h.id = ? } ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } #------------------------------------------------------------------------------- # Find all the "double hosts" #------------------------------------------------------------------------------- # # Query MariaDB for the target hosts # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop through the list of double hostnames and parse them out. Save the # originals in the %doubles hash and the parsed names in the %hosts hash. # while ( $h1 = $sth1->fetchrow_hashref ) { # # Each hash value is a hash containing the original id, and, in a sub-hash # the replacement ids # $doubles{$h1->{host}} = { double => $h1->{hostid}, singles => {}, }; # # Parse the double host string # @h = ( $h1->{host} =~ /^(.+)\s+and\s+(.+)$/ ); printf "%-4d %s", $h1->{hostid}, $h1->{host}; print " [", join( ",", @h ), "]\n"; # # Initialise the entries for %doubles and %hosts # for my $host (@h) { $doubles{$h1->{host}}->{singles}->{$host} = undef; unless ( exists( $hosts{$host} ) ) { $hosts{$host} = 0; } } } print '-' x 80,"\n"; #------------------------------------------------------------------------------- # Find the single hosts in the 'hosts' table #------------------------------------------------------------------------------- # # Scan the list of individual hosts and find them in the 'hosts' table # $unknown = 0; foreach my $host ( sort(keys(%hosts)) ) { $rv2 = $sth2->execute("^$host\$"); if ( $dbh1->err ) { die $dbh1->errstr; } $rv2 = 0 if ( $rv2 eq '0E0' ); if ($rv2) { $h2 = $sth2->fetchrow_hashref; print "Found id for $host: ", $h2->{hostid}, "\n"; $hosts{$host} = $h2->{hostid}; save_hostid(\%doubles,$host,$h2->{hostid}); } else { print "Can't find $host\n"; $unknown++; } } #print Dumper(\%hosts),"\n"; print '-' x 80,"\n"; #------------------------------------------------------------------------------- # Allocate all unknown hosts a host id in the PostgreSQL database, and give an # unique email address. #------------------------------------------------------------------------------- if ( $unknown > 0 ) { print "Registering $unknown hosts\n"; foreach my $host ( sort( keys(%hosts) ) ) { if ( $hosts{$host} == 0 ) { $rv3 = $sth3->execute( $host, undef, $default_licence ); if ( $dbh2->err ) { die $dbh2->errstr; } # # Write a row to the 'hosts' table and save the id number # generated # my $newid = $dbh2->last_insert_id( undef, undef, undef, undef, { sequence => 'host_seq' } ); $hosts{$host} = $newid; save_hostid(\%doubles,$host,$newid); print "Host $host added with id $newid\n"; # # Give the new host entry a default email address # $default_email = sprintf($email_template,$newid); $rv3 = $dbh2->do( 'UPDATE hosts SET email = ? WHERE id = ?', undef, $default_email, $newid ); if ( $dbh2->err ) { warn $dbh2->errstr; } $rv3 = 0 if ( $rv3 eq '0E0' ); warn "Failed to set email address $default_email for $host\n" unless ( defined($rv3) ); } } } print '-' x 80,"\n"; #------------------------------------------------------------------------------- # Now %doubles contains all the original names and host ids and %hosts # contains the parsed out names and their ids. We can look for shows # attributed to the first set and re-attribute them to the second set. #------------------------------------------------------------------------------- print "Changing host associations for shows with two hosts\n"; foreach my $double ( sort( keys(%doubles) ) ) { print "Processing $double\n"; my ( $doubleid, @newids ) = ( $doubles{$double}->{double}, values( %{ $doubles{$double}->{singles} } ) ); print " Original id: $doubleid\n"; print " Replacements: ", join( ", ", @newids ), "\n"; # # Find shows marked as belonging to this double-host # $sth4->execute($doubleid); if ( $dbh2->err ) { die $dbh2->errstr; } # # Process all the shows # while ( $h4 = $sth4->fetchrow_hashref ) { my $eps_id = $h4->{eps_id}; print " Show $eps_id is ascribed to host $doubleid\n"; $dbh2->begin_work(); # # Delete the xref link for the double host # $rv4 = $dbh2->do( 'DELETE FROM episodes_hosts_xref WHERE episodes_id = ?', undef, $eps_id ); if ( $dbh2->err ) { warn $dbh2->errstr; } $rv4 = 0 if ( $rv4 eq '0E0' ); if ( defined($rv4) ) { print " Deleted entry from episodes_hosts_xref for $eps_id\n"; } else { warn "Problem deleting from episodes_hosts_xref for $eps_id\n"; } # # Add links for the single hosts # foreach my $hid (@newids) { $rv4 = $dbh2->do( 'INSERT INTO episodes_hosts_xref VALUES (?,?)', undef, $eps_id, $hid ); if ( $dbh2->err ) { warn $dbh2->errstr; } $rv4 = 0 if ( $rv4 eq '0E0' ); if ( defined($rv4) ) { print " Added entry to episodes_hosts_xref values ", "$eps_id,$hid\n"; } else { warn "Problem adding to episodes_hosts_xref values " . "$eps_id,$hid\n"; } } # # Commit the delete/inserts above # $dbh2->commit(); } print '~' x 80, "\n"; # # Delete the double host (NOTE: This will fail due to referential # integrity if the DELETE above failed, so there is scope for debris to be # left around) # $rv4 = $dbh2->do( 'DELETE FROM hosts WHERE id = ?', undef, $doubleid ); if ( $dbh2->err ) { warn $dbh2->errstr; } $rv4 = 0 if ( $rv4 eq '0E0' ); if ( defined($rv4) ) { print " Deleted entry from hosts for id $doubleid ($double)\n"; } else { warn "Problem deleting from hosts for id $doubleid ($double)\n"; } } print '-' x 80,"\n"; exit; #=== FUNCTION ================================================================ # NAME: save_hostid # PURPOSE: Saves the host id after searching for the key in the %doubles # hash # PARAMETERS: $doubles hashref to %doubles # $host host key # $hostid host id number # RETURNS: Nothing # DESCRIPTION: Searches the %doubles hash for particular keys in the # 'singles' sub-hash. If found saves the corresponding host id # there. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub save_hostid { my ( $doubles, $host, $hostid ) = @_; foreach my $key ( keys(%$doubles) ) { if ( exists( $doubles->{$key}->{singles}->{$host} ) ) { $doubles->{$key}->{singles}->{$host} = $hostid; } } } # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker