forked from HPR/hpr-tools
		
	
		
			
	
	
		
			426 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			426 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: find_double_hosts | ||
|  | # | ||
|  | #        USAGE: ./find_double_hosts | ||
|  | # | ||
|  | #  DESCRIPTION: Find HPR shows with two hosts (host is "A and B"), find the | ||
|  | #               hosts if possible and flag updates to the database to | ||
|  | #               represent the dual nature. | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.2 | ||
|  | #      CREATED: 2017-10-13 19:17:51 | ||
|  | #     REVISION: 2017-10-13 19:19:43 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use 5.010; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use utf8; | ||
|  | 
 | ||
|  | use Config::General; | ||
|  | use DBI; | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.0.2'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script and directory names | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | $DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir    = "$ENV{HOME}/HPR/Database"; | ||
|  | my $configfile1 = "$basedir/.hpr_db.cfg"; | ||
|  | my $configfile2 = "$basedir/.hpr_pg.cfg"; | ||
|  | 
 | ||
|  | my $email_template  = 'host_%s@hackerpublicradio.org'; | ||
|  | my $default_licence = 'CC-BY-SA'; | ||
|  | 
 | ||
|  | my ( $dbh1, $dbh2, $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 ); | ||
|  | 
 | ||
|  | my ( %doubles, @h, %hosts, $unknown, $default_email ); | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | # | ||
|  | # Load database configuration data | ||
|  | # | ||
|  | my $conf1 = Config::General->new( | ||
|  |     -ConfigFile      => $configfile1, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1 | ||
|  | ); | ||
|  | my %config1 = $conf1->getall(); | ||
|  | 
 | ||
|  | my $conf2 = Config::General->new( | ||
|  |     -ConfigFile      => $configfile2, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1 | ||
|  | ); | ||
|  | my %config2 = $conf2->getall(); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Connect to the MariaDB database | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $dbtype1 = $config1{database}->{type} // 'mysql'; | ||
|  | my $dbhost1 = $config1{database}->{host} // '127.0.0.1'; | ||
|  | my $dbport1 = $config1{database}->{port} // 3306; | ||
|  | my $dbname1 = $config1{database}->{name}; | ||
|  | my $dbuser1 = $config1{database}->{user}; | ||
|  | my $dbpwd1  = $config1{database}->{password}; | ||
|  | $dbh1 | ||
|  |     = DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1", | ||
|  |     $dbuser1, $dbpwd1, { AutoCommit => 1 } ) | ||
|  |     or die $DBI::errstr; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable client-side UTF8 | ||
|  | # | ||
|  | $dbh1->{mysql_enable_utf8} = 1; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Connect to the PostgreSQL database | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $dbtype2 = $config2{database}->{type} // 'Pg'; | ||
|  | my $dbhost2 = $config2{database}->{host} // '127.0.0.1'; | ||
|  | my $dbport2 = $config2{database}->{port} // 5432; | ||
|  | my $dbname2 = $config2{database}->{name}; | ||
|  | my $dbuser2 = $config2{database}->{user}; | ||
|  | my $dbpwd2  = $config2{database}->{password}; | ||
|  | $dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2", | ||
|  |     $dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } ) | ||
|  |     or die $DBI::errstr; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable client-side UTF8 | ||
|  | # | ||
|  | $dbh2->{pg_enable_utf8} = 1; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Query preparation | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # MariaDB query to find double hosts | ||
|  | # | ||
|  | my $sql1 = q{ | ||
|  |     SELECT hostid, host FROM hosts | ||
|  |     WHERE host regexp '[[:<:]]and[[:>:]]' | ||
|  |     ORDER BY hostid | ||
|  | }; | ||
|  | 
 | ||
|  | $sth1 = $dbh1->prepare($sql1) or die $DBI::errstr; | ||
|  | if ( $dbh1->err ) { | ||
|  |     warn $dbh1->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # MariaDB query to find the host by name | ||
|  | # | ||
|  | $sth2 = $dbh1->prepare(q{SELECT hostid FROM hosts WHERE host REGEXP ?}) | ||
|  |     or die $DBI::errstr; | ||
|  | if ( $dbh1->err ) { | ||
|  |     warn $dbh1->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # PostgreSQL query to register an unknown host | ||
|  | # | ||
|  | $sth3 | ||
|  |     = $dbh2->prepare(q{INSERT INTO hosts (host,email,license) VALUES (?,?,?)}) | ||
|  |     or die $DBI::errstr; | ||
|  | if ( $dbh2->err ) { | ||
|  |     warn $dbh2->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # PostgreSQL query to find shows with particular host ids | ||
|  | # | ||
|  | $sth4 = $dbh2->prepare( | ||
|  |     q{ | ||
|  |         SELECT e.id AS eps_id | ||
|  |         FROM episodes e | ||
|  |         JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id) | ||
|  |         JOIN hosts h ON (h.id = eh.hosts_id) | ||
|  |         WHERE h.id = ? | ||
|  |     } | ||
|  | ) or die $DBI::errstr; | ||
|  | if ( $dbh2->err ) { | ||
|  |     warn $dbh2->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Find all the "double hosts" | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Query MariaDB for the target hosts | ||
|  | # | ||
|  | $sth1->execute; | ||
|  | if ( $dbh1->err ) { | ||
|  |     die $dbh1->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Loop through the list of double hostnames and parse them out. Save the | ||
|  | # originals in the %doubles hash and the parsed names in the %hosts hash. | ||
|  | # | ||
|  | while ( $h1 = $sth1->fetchrow_hashref ) { | ||
|  |     # | ||
|  |     # Each hash value is a hash containing the original id, and, in a sub-hash | ||
|  |     # the replacement ids | ||
|  |     # | ||
|  |     $doubles{$h1->{host}} = { | ||
|  |         double => $h1->{hostid}, | ||
|  |         singles => {}, | ||
|  |     }; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Parse the double host string | ||
|  |     # | ||
|  |     @h = ( $h1->{host} =~ /^(.+)\s+and\s+(.+)$/ ); | ||
|  | 
 | ||
|  |     printf "%-4d %s", $h1->{hostid}, $h1->{host}; | ||
|  |     print " [", join( ",", @h ), "]\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Initialise the entries for %doubles and %hosts | ||
|  |     # | ||
|  |     for my $host (@h) { | ||
|  |         $doubles{$h1->{host}}->{singles}->{$host} = undef; | ||
|  |         unless ( exists( $hosts{$host} ) ) { | ||
|  |             $hosts{$host} = 0; | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | print '-' x 80,"\n"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Find the single hosts in the 'hosts' table | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Scan the list of individual hosts and find them in the 'hosts' table | ||
|  | # | ||
|  | $unknown = 0; | ||
|  | foreach my $host ( sort(keys(%hosts)) ) { | ||
|  |     $rv2 = $sth2->execute("^$host\$"); | ||
|  |     if ( $dbh1->err ) { | ||
|  |         die $dbh1->errstr; | ||
|  |     } | ||
|  |     $rv2 = 0 if ( $rv2 eq '0E0' ); | ||
|  | 
 | ||
|  |     if ($rv2) { | ||
|  |         $h2 = $sth2->fetchrow_hashref; | ||
|  |         print "Found id for $host: ", $h2->{hostid}, "\n"; | ||
|  |         $hosts{$host} = $h2->{hostid}; | ||
|  |         save_hostid(\%doubles,$host,$h2->{hostid}); | ||
|  |     } | ||
|  |     else { | ||
|  |         print "Can't find $host\n"; | ||
|  |         $unknown++; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #print Dumper(\%hosts),"\n"; | ||
|  | 
 | ||
|  | print '-' x 80,"\n"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Allocate all unknown hosts a host id in the PostgreSQL database, and give an | ||
|  | # unique email address. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ( $unknown > 0 ) { | ||
|  |     print "Registering $unknown hosts\n"; | ||
|  | 
 | ||
|  |     foreach my $host ( sort( keys(%hosts) ) ) { | ||
|  |         if ( $hosts{$host} == 0 ) { | ||
|  |             $rv3 = $sth3->execute( $host, undef, $default_licence ); | ||
|  |             if ( $dbh2->err ) { | ||
|  |                 die $dbh2->errstr; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Write a row to the 'hosts' table and save the id number | ||
|  |             # generated | ||
|  |             # | ||
|  |             my $newid = $dbh2->last_insert_id( undef, undef, undef, undef, | ||
|  |                 { sequence => 'host_seq' } ); | ||
|  |             $hosts{$host} = $newid; | ||
|  |             save_hostid(\%doubles,$host,$newid); | ||
|  |             print "Host $host added with id $newid\n"; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Give the new host entry a default email address | ||
|  |             # | ||
|  |             $default_email = sprintf($email_template,$newid); | ||
|  |             $rv3 = $dbh2->do( 'UPDATE hosts SET email = ? WHERE id = ?', | ||
|  |                 undef, $default_email, $newid ); | ||
|  |             if ( $dbh2->err ) { | ||
|  |                 warn $dbh2->errstr; | ||
|  |             } | ||
|  |             $rv3 = 0 if ( $rv3 eq '0E0' ); | ||
|  | 
 | ||
|  |             warn "Failed to set email address $default_email for $host\n" | ||
|  |                 unless ( defined($rv3) ); | ||
|  | 
 | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | print '-' x 80,"\n"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Now %doubles contains all the original names and host ids and %hosts | ||
|  | # contains the parsed out names and their ids. We can look for shows | ||
|  | # attributed to the first set and re-attribute them to the second set. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | print "Changing host associations for shows with two hosts\n"; | ||
|  | foreach my $double ( sort( keys(%doubles) ) ) { | ||
|  |     print "Processing $double\n"; | ||
|  |     my ( $doubleid, @newids ) = ( | ||
|  |         $doubles{$double}->{double}, | ||
|  |         values( %{ $doubles{$double}->{singles} } ) | ||
|  |     ); | ||
|  |     print "  Original id: $doubleid\n"; | ||
|  |     print "  Replacements: ", join( ", ", @newids ), "\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find shows marked as belonging to this double-host | ||
|  |     # | ||
|  |     $sth4->execute($doubleid); | ||
|  |     if ( $dbh2->err ) { | ||
|  |         die $dbh2->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Process all the shows | ||
|  |     # | ||
|  |     while ( $h4 = $sth4->fetchrow_hashref ) { | ||
|  |         my $eps_id = $h4->{eps_id}; | ||
|  |         print "  Show $eps_id is ascribed to host $doubleid\n"; | ||
|  | 
 | ||
|  |         $dbh2->begin_work(); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Delete the xref link for the double host | ||
|  |         # | ||
|  |         $rv4 | ||
|  |             = $dbh2->do( | ||
|  |             'DELETE FROM episodes_hosts_xref WHERE episodes_id = ?', | ||
|  |             undef, $eps_id ); | ||
|  |         if ( $dbh2->err ) { | ||
|  |             warn $dbh2->errstr; | ||
|  |         } | ||
|  |         $rv4 = 0 if ( $rv4 eq '0E0' ); | ||
|  | 
 | ||
|  |         if ( defined($rv4) ) { | ||
|  |             print "  Deleted entry from episodes_hosts_xref for $eps_id\n"; | ||
|  |         } | ||
|  |         else { | ||
|  |             warn "Problem deleting from episodes_hosts_xref for $eps_id\n"; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Add links for the single hosts | ||
|  |         # | ||
|  |         foreach my $hid (@newids) { | ||
|  |             $rv4 = $dbh2->do( 'INSERT INTO episodes_hosts_xref VALUES (?,?)', | ||
|  |                 undef, $eps_id, $hid ); | ||
|  |             if ( $dbh2->err ) { | ||
|  |                 warn $dbh2->errstr; | ||
|  |             } | ||
|  |             $rv4 = 0 if ( $rv4 eq '0E0' ); | ||
|  | 
 | ||
|  |             if ( defined($rv4) ) { | ||
|  |                 print "  Added entry to episodes_hosts_xref values ", | ||
|  |                     "$eps_id,$hid\n"; | ||
|  |             } | ||
|  |             else { | ||
|  |                 warn "Problem adding to episodes_hosts_xref values " | ||
|  |                     . "$eps_id,$hid\n"; | ||
|  |             } | ||
|  | 
 | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Commit the delete/inserts above | ||
|  |         # | ||
|  |         $dbh2->commit(); | ||
|  | 
 | ||
|  |     } | ||
|  |     print '~' x 80, "\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Delete the double host (NOTE: This will fail due to referential | ||
|  |     # integrity if the DELETE above failed, so there is scope for debris to be | ||
|  |     # left around) | ||
|  |     # | ||
|  |     $rv4 = $dbh2->do( 'DELETE FROM hosts WHERE id = ?', undef, $doubleid ); | ||
|  |     if ( $dbh2->err ) { | ||
|  |         warn $dbh2->errstr; | ||
|  |     } | ||
|  |     $rv4 = 0 if ( $rv4 eq '0E0' ); | ||
|  | 
 | ||
|  |     if ( defined($rv4) ) { | ||
|  |         print "  Deleted entry from hosts for id $doubleid ($double)\n"; | ||
|  |     } | ||
|  |     else { | ||
|  |         warn "Problem deleting from hosts for id $doubleid ($double)\n"; | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | print '-' x 80,"\n"; | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: save_hostid | ||
|  | #      PURPOSE: Saves the host id after searching for the key in the %doubles | ||
|  | #               hash | ||
|  | #   PARAMETERS: $doubles                hashref to %doubles | ||
|  | #               $host                   host key | ||
|  | #               $hostid                 host id number | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Searches the %doubles hash for particular keys in the | ||
|  | #               'singles' sub-hash. If found saves the corresponding host id | ||
|  | #               there. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub save_hostid { | ||
|  |     my ( $doubles, $host, $hostid ) = @_; | ||
|  | 
 | ||
|  |     foreach my $key ( keys(%$doubles) ) { | ||
|  |         if ( exists( $doubles->{$key}->{singles}->{$host} ) ) { | ||
|  |             $doubles->{$key}->{singles}->{$host} = $hostid; | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||
|  | 
 |