forked from HPR/hpr-tools
		
	Moved project directories and files to an empty local repo
This commit is contained in:
		
							
								
								
									
										425
									
								
								Database/find_double_hosts
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										425
									
								
								Database/find_double_hosts
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,425 @@ | ||||
| #!/usr/bin/env perl | ||||
| #=============================================================================== | ||||
| # | ||||
| #         FILE: find_double_hosts | ||||
| # | ||||
| #        USAGE: ./find_double_hosts | ||||
| # | ||||
| #  DESCRIPTION: Find HPR shows with two hosts (host is "A and B"), find the | ||||
| #               hosts if possible and flag updates to the database to | ||||
| #               represent the dual nature. | ||||
| # | ||||
| #      OPTIONS: --- | ||||
| # REQUIREMENTS: --- | ||||
| #         BUGS: --- | ||||
| #        NOTES: --- | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.0.2 | ||||
| #      CREATED: 2017-10-13 19:17:51 | ||||
| #     REVISION: 2017-10-13 19:19:43 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| use 5.010; | ||||
| use strict; | ||||
| use warnings; | ||||
| use utf8; | ||||
|  | ||||
| use Config::General; | ||||
| use DBI; | ||||
|  | ||||
| use Data::Dumper; | ||||
|  | ||||
| # | ||||
| # Version number (manually incremented) | ||||
| # | ||||
| our $VERSION = '0.0.2'; | ||||
|  | ||||
| # | ||||
| # Script and directory names | ||||
| # | ||||
| ( my $PROG = $0 ) =~ s|.*/||mx; | ||||
| ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||||
| $DIR = '.' unless $DIR; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Declarations | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # Constants and other declarations | ||||
| # | ||||
| my $basedir    = "$ENV{HOME}/HPR/Database"; | ||||
| my $configfile1 = "$basedir/.hpr_db.cfg"; | ||||
| my $configfile2 = "$basedir/.hpr_pg.cfg"; | ||||
|  | ||||
| my $email_template  = 'host_%s@hackerpublicradio.org'; | ||||
| my $default_licence = 'CC-BY-SA'; | ||||
|  | ||||
| my ( $dbh1, $dbh2, $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 ); | ||||
|  | ||||
| my ( %doubles, @h, %hosts, $unknown, $default_email ); | ||||
|  | ||||
| # | ||||
| # Enable Unicode mode | ||||
| # | ||||
| binmode STDOUT, ":encoding(UTF-8)"; | ||||
| binmode STDERR, ":encoding(UTF-8)"; | ||||
|  | ||||
| # | ||||
| # Load database configuration data | ||||
| # | ||||
| my $conf1 = Config::General->new( | ||||
|     -ConfigFile      => $configfile1, | ||||
|     -InterPolateVars => 1, | ||||
|     -ExtendedAccess  => 1 | ||||
| ); | ||||
| my %config1 = $conf1->getall(); | ||||
|  | ||||
| my $conf2 = Config::General->new( | ||||
|     -ConfigFile      => $configfile2, | ||||
|     -InterPolateVars => 1, | ||||
|     -ExtendedAccess  => 1 | ||||
| ); | ||||
| my %config2 = $conf2->getall(); | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Connect to the MariaDB database | ||||
| #------------------------------------------------------------------------------- | ||||
| my $dbtype1 = $config1{database}->{type} // 'mysql'; | ||||
| my $dbhost1 = $config1{database}->{host} // '127.0.0.1'; | ||||
| my $dbport1 = $config1{database}->{port} // 3306; | ||||
| my $dbname1 = $config1{database}->{name}; | ||||
| my $dbuser1 = $config1{database}->{user}; | ||||
| my $dbpwd1  = $config1{database}->{password}; | ||||
| $dbh1 | ||||
|     = DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1", | ||||
|     $dbuser1, $dbpwd1, { AutoCommit => 1 } ) | ||||
|     or die $DBI::errstr; | ||||
|  | ||||
| # | ||||
| # Enable client-side UTF8 | ||||
| # | ||||
| $dbh1->{mysql_enable_utf8} = 1; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Connect to the PostgreSQL database | ||||
| #------------------------------------------------------------------------------- | ||||
| my $dbtype2 = $config2{database}->{type} // 'Pg'; | ||||
| my $dbhost2 = $config2{database}->{host} // '127.0.0.1'; | ||||
| my $dbport2 = $config2{database}->{port} // 5432; | ||||
| my $dbname2 = $config2{database}->{name}; | ||||
| my $dbuser2 = $config2{database}->{user}; | ||||
| my $dbpwd2  = $config2{database}->{password}; | ||||
| $dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2", | ||||
|     $dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } ) | ||||
|     or die $DBI::errstr; | ||||
|  | ||||
| # | ||||
| # Enable client-side UTF8 | ||||
| # | ||||
| $dbh2->{pg_enable_utf8} = 1; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Query preparation | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # MariaDB query to find double hosts | ||||
| # | ||||
| my $sql1 = q{ | ||||
|     SELECT hostid, host FROM hosts | ||||
|     WHERE host regexp '[[:<:]]and[[:>:]]' | ||||
|     ORDER BY hostid | ||||
| }; | ||||
|  | ||||
| $sth1 = $dbh1->prepare($sql1) or die $DBI::errstr; | ||||
| if ( $dbh1->err ) { | ||||
|     warn $dbh1->errstr; | ||||
| } | ||||
|  | ||||
| # | ||||
| # MariaDB query to find the host by name | ||||
| # | ||||
| $sth2 = $dbh1->prepare(q{SELECT hostid FROM hosts WHERE host REGEXP ?}) | ||||
|     or die $DBI::errstr; | ||||
| if ( $dbh1->err ) { | ||||
|     warn $dbh1->errstr; | ||||
| } | ||||
|  | ||||
| # | ||||
| # PostgreSQL query to register an unknown host | ||||
| # | ||||
| $sth3 | ||||
|     = $dbh2->prepare(q{INSERT INTO hosts (host,email,license) VALUES (?,?,?)}) | ||||
|     or die $DBI::errstr; | ||||
| if ( $dbh2->err ) { | ||||
|     warn $dbh2->errstr; | ||||
| } | ||||
|  | ||||
| # | ||||
| # PostgreSQL query to find shows with particular host ids | ||||
| # | ||||
| $sth4 = $dbh2->prepare( | ||||
|     q{ | ||||
|         SELECT e.id AS eps_id | ||||
|         FROM episodes e | ||||
|         JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id) | ||||
|         JOIN hosts h ON (h.id = eh.hosts_id) | ||||
|         WHERE h.id = ? | ||||
|     } | ||||
| ) or die $DBI::errstr; | ||||
| if ( $dbh2->err ) { | ||||
|     warn $dbh2->errstr; | ||||
| } | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Find all the "double hosts" | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # Query MariaDB for the target hosts | ||||
| # | ||||
| $sth1->execute; | ||||
| if ( $dbh1->err ) { | ||||
|     die $dbh1->errstr; | ||||
| } | ||||
|  | ||||
| # | ||||
| # Loop through the list of double hostnames and parse them out. Save the | ||||
| # originals in the %doubles hash and the parsed names in the %hosts hash. | ||||
| # | ||||
| while ( $h1 = $sth1->fetchrow_hashref ) { | ||||
|     # | ||||
|     # Each hash value is a hash containing the original id, and, in a sub-hash | ||||
|     # the replacement ids | ||||
|     # | ||||
|     $doubles{$h1->{host}} = { | ||||
|         double => $h1->{hostid}, | ||||
|         singles => {}, | ||||
|     }; | ||||
|  | ||||
|     # | ||||
|     # Parse the double host string | ||||
|     # | ||||
|     @h = ( $h1->{host} =~ /^(.+)\s+and\s+(.+)$/ ); | ||||
|  | ||||
|     printf "%-4d %s", $h1->{hostid}, $h1->{host}; | ||||
|     print " [", join( ",", @h ), "]\n"; | ||||
|  | ||||
|     # | ||||
|     # Initialise the entries for %doubles and %hosts | ||||
|     # | ||||
|     for my $host (@h) { | ||||
|         $doubles{$h1->{host}}->{singles}->{$host} = undef; | ||||
|         unless ( exists( $hosts{$host} ) ) { | ||||
|             $hosts{$host} = 0; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| print '-' x 80,"\n"; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Find the single hosts in the 'hosts' table | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # Scan the list of individual hosts and find them in the 'hosts' table | ||||
| # | ||||
| $unknown = 0; | ||||
| foreach my $host ( sort(keys(%hosts)) ) { | ||||
|     $rv2 = $sth2->execute("^$host\$"); | ||||
|     if ( $dbh1->err ) { | ||||
|         die $dbh1->errstr; | ||||
|     } | ||||
|     $rv2 = 0 if ( $rv2 eq '0E0' ); | ||||
|  | ||||
|     if ($rv2) { | ||||
|         $h2 = $sth2->fetchrow_hashref; | ||||
|         print "Found id for $host: ", $h2->{hostid}, "\n"; | ||||
|         $hosts{$host} = $h2->{hostid}; | ||||
|         save_hostid(\%doubles,$host,$h2->{hostid}); | ||||
|     } | ||||
|     else { | ||||
|         print "Can't find $host\n"; | ||||
|         $unknown++; | ||||
|     } | ||||
| } | ||||
|  | ||||
| #print Dumper(\%hosts),"\n"; | ||||
|  | ||||
| print '-' x 80,"\n"; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Allocate all unknown hosts a host id in the PostgreSQL database, and give an | ||||
| # unique email address. | ||||
| #------------------------------------------------------------------------------- | ||||
| if ( $unknown > 0 ) { | ||||
|     print "Registering $unknown hosts\n"; | ||||
|  | ||||
|     foreach my $host ( sort( keys(%hosts) ) ) { | ||||
|         if ( $hosts{$host} == 0 ) { | ||||
|             $rv3 = $sth3->execute( $host, undef, $default_licence ); | ||||
|             if ( $dbh2->err ) { | ||||
|                 die $dbh2->errstr; | ||||
|             } | ||||
|  | ||||
|             # | ||||
|             # Write a row to the 'hosts' table and save the id number | ||||
|             # generated | ||||
|             # | ||||
|             my $newid = $dbh2->last_insert_id( undef, undef, undef, undef, | ||||
|                 { sequence => 'host_seq' } ); | ||||
|             $hosts{$host} = $newid; | ||||
|             save_hostid(\%doubles,$host,$newid); | ||||
|             print "Host $host added with id $newid\n"; | ||||
|  | ||||
|             # | ||||
|             # Give the new host entry a default email address | ||||
|             # | ||||
|             $default_email = sprintf($email_template,$newid); | ||||
|             $rv3 = $dbh2->do( 'UPDATE hosts SET email = ? WHERE id = ?', | ||||
|                 undef, $default_email, $newid ); | ||||
|             if ( $dbh2->err ) { | ||||
|                 warn $dbh2->errstr; | ||||
|             } | ||||
|             $rv3 = 0 if ( $rv3 eq '0E0' ); | ||||
|  | ||||
|             warn "Failed to set email address $default_email for $host\n" | ||||
|                 unless ( defined($rv3) ); | ||||
|  | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| print '-' x 80,"\n"; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Now %doubles contains all the original names and host ids and %hosts | ||||
| # contains the parsed out names and their ids. We can look for shows | ||||
| # attributed to the first set and re-attribute them to the second set. | ||||
| #------------------------------------------------------------------------------- | ||||
| print "Changing host associations for shows with two hosts\n"; | ||||
| foreach my $double ( sort( keys(%doubles) ) ) { | ||||
|     print "Processing $double\n"; | ||||
|     my ( $doubleid, @newids ) = ( | ||||
|         $doubles{$double}->{double}, | ||||
|         values( %{ $doubles{$double}->{singles} } ) | ||||
|     ); | ||||
|     print "  Original id: $doubleid\n"; | ||||
|     print "  Replacements: ", join( ", ", @newids ), "\n"; | ||||
|  | ||||
|     # | ||||
|     # Find shows marked as belonging to this double-host | ||||
|     # | ||||
|     $sth4->execute($doubleid); | ||||
|     if ( $dbh2->err ) { | ||||
|         die $dbh2->errstr; | ||||
|     } | ||||
|  | ||||
|     # | ||||
|     # Process all the shows | ||||
|     # | ||||
|     while ( $h4 = $sth4->fetchrow_hashref ) { | ||||
|         my $eps_id = $h4->{eps_id}; | ||||
|         print "  Show $eps_id is ascribed to host $doubleid\n"; | ||||
|  | ||||
|         $dbh2->begin_work(); | ||||
|  | ||||
|         # | ||||
|         # Delete the xref link for the double host | ||||
|         # | ||||
|         $rv4 | ||||
|             = $dbh2->do( | ||||
|             'DELETE FROM episodes_hosts_xref WHERE episodes_id = ?', | ||||
|             undef, $eps_id ); | ||||
|         if ( $dbh2->err ) { | ||||
|             warn $dbh2->errstr; | ||||
|         } | ||||
|         $rv4 = 0 if ( $rv4 eq '0E0' ); | ||||
|  | ||||
|         if ( defined($rv4) ) { | ||||
|             print "  Deleted entry from episodes_hosts_xref for $eps_id\n"; | ||||
|         } | ||||
|         else { | ||||
|             warn "Problem deleting from episodes_hosts_xref for $eps_id\n"; | ||||
|         } | ||||
|  | ||||
|         # | ||||
|         # Add links for the single hosts | ||||
|         # | ||||
|         foreach my $hid (@newids) { | ||||
|             $rv4 = $dbh2->do( 'INSERT INTO episodes_hosts_xref VALUES (?,?)', | ||||
|                 undef, $eps_id, $hid ); | ||||
|             if ( $dbh2->err ) { | ||||
|                 warn $dbh2->errstr; | ||||
|             } | ||||
|             $rv4 = 0 if ( $rv4 eq '0E0' ); | ||||
|  | ||||
|             if ( defined($rv4) ) { | ||||
|                 print "  Added entry to episodes_hosts_xref values ", | ||||
|                     "$eps_id,$hid\n"; | ||||
|             } | ||||
|             else { | ||||
|                 warn "Problem adding to episodes_hosts_xref values " | ||||
|                     . "$eps_id,$hid\n"; | ||||
|             } | ||||
|  | ||||
|         } | ||||
|  | ||||
|         # | ||||
|         # Commit the delete/inserts above | ||||
|         # | ||||
|         $dbh2->commit(); | ||||
|  | ||||
|     } | ||||
|     print '~' x 80, "\n"; | ||||
|  | ||||
|     # | ||||
|     # Delete the double host (NOTE: This will fail due to referential | ||||
|     # integrity if the DELETE above failed, so there is scope for debris to be | ||||
|     # left around) | ||||
|     # | ||||
|     $rv4 = $dbh2->do( 'DELETE FROM hosts WHERE id = ?', undef, $doubleid ); | ||||
|     if ( $dbh2->err ) { | ||||
|         warn $dbh2->errstr; | ||||
|     } | ||||
|     $rv4 = 0 if ( $rv4 eq '0E0' ); | ||||
|  | ||||
|     if ( defined($rv4) ) { | ||||
|         print "  Deleted entry from hosts for id $doubleid ($double)\n"; | ||||
|     } | ||||
|     else { | ||||
|         warn "Problem deleting from hosts for id $doubleid ($double)\n"; | ||||
|     } | ||||
|  | ||||
| } | ||||
|  | ||||
| print '-' x 80,"\n"; | ||||
|  | ||||
| exit; | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: save_hostid | ||||
| #      PURPOSE: Saves the host id after searching for the key in the %doubles | ||||
| #               hash | ||||
| #   PARAMETERS: $doubles                hashref to %doubles | ||||
| #               $host                   host key | ||||
| #               $hostid                 host id number | ||||
| #      RETURNS: Nothing | ||||
| #  DESCRIPTION: Searches the %doubles hash for particular keys in the | ||||
| #               'singles' sub-hash. If found saves the corresponding host id | ||||
| #               there. | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub save_hostid { | ||||
|     my ( $doubles, $host, $hostid ) = @_; | ||||
|  | ||||
|     foreach my $key ( keys(%$doubles) ) { | ||||
|         if ( exists( $doubles->{$key}->{singles}->{$host} ) ) { | ||||
|             $doubles->{$key}->{singles}->{$host} = $hostid; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||||
|  | ||||
		Reference in New Issue
	
	Block a user