forked from HPR/hpr-tools
		
	
		
			
	
	
		
			355 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			355 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: remodel_db_host_eps | ||
|  | # | ||
|  | #        USAGE: ./remodel_db_host_eps | ||
|  | # | ||
|  | #  DESCRIPTION: Remodel the 'hosts' and 'eps' tables in the HPR database so | ||
|  | #               that a many-to-many relationship between host and episode can | ||
|  | #               be established. | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.2 | ||
|  | #      CREATED: 2014-05-08 10:55:28 | ||
|  | #     REVISION: 2015-06-26 13:33:20 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use 5.010; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use utf8; | ||
|  | 
 | ||
|  | use YAML::Syck; | ||
|  | use List::Util qw{max}; | ||
|  | use List::MoreUtils qw{uniq}; | ||
|  | use DBI; | ||
|  | #use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.0.2'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script name | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | $DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir    = "$ENV{HOME}/HPR/Database"; | ||
|  | my $configfile = "$basedir/.hpr_db.yml"; | ||
|  | 
 | ||
|  | my ( $dbh, $sth1, $sth2, $sth3, $sth4, $h1, $h2, $rv ); | ||
|  | my ( %hosts_by_name, %hosts_by_id, %eps, @names, $hostid, $hid, $max_hostid ); | ||
|  | 
 | ||
|  | # | ||
|  | # Names of fields in the 'hosts' table in the appropriate order for the later | ||
|  | # INSERT statement | ||
|  | # | ||
|  | my @host_flds = qw{ | ||
|  |     hostid | ||
|  |     host | ||
|  |     email | ||
|  |     profile | ||
|  |     license | ||
|  |     local_image | ||
|  |     valid | ||
|  | }; | ||
|  | 
 | ||
|  | # | ||
|  | # Names of fields in the 'eps' table in the appropriate order for the later | ||
|  | # INSERT statement. Note that it omits the 'hostid' field. | ||
|  | # | ||
|  | my @eps_flds = qw{ | ||
|  |     id | ||
|  |     date | ||
|  |     title | ||
|  |     summary | ||
|  |     notes | ||
|  |     series | ||
|  |     explicit | ||
|  |     license | ||
|  |     tags | ||
|  |     version | ||
|  |     valid | ||
|  | }; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode output mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | # | ||
|  | # Load configuration data | ||
|  | # | ||
|  | my %config = %{ LoadFile($configfile) }; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Connect to the database | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $dbname = $config{database}->{name}; | ||
|  | my $dbuser = $config{database}->{user}; | ||
|  | my $dbpwd  = $config{database}->{password}; | ||
|  | $dbh | ||
|  |     = DBI->connect( "dbi:mysql:dbname=$dbname", $dbuser, $dbpwd, | ||
|  |     { AutoCommit => 1 } ) | ||
|  |     or die $DBI::errstr; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable client-side UTF8 | ||
|  | # | ||
|  | $dbh->{mysql_enable_utf8} = 1; | ||
|  | 
 | ||
|  | #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= | ||
|  | # The expectation is that we have the following original tables: | ||
|  | # hosts         - the details of each host (contains some double entries | ||
|  | #                 "hostA and hostB") | ||
|  | # eps           - the details of all episodes, currently with a host id number | ||
|  | #                 against each one | ||
|  | # | ||
|  | # We also have the following new tables for the transition: | ||
|  | # new_hosts     - an empty copy of the 'hosts' table, InnoDB | ||
|  | # new_eps       - an empty copy of the 'eps' table without the 'hostid' | ||
|  | #                 column, InnoDB | ||
|  | # hosts_eps     - a mapping table for joining together the 'new_hosts' and | ||
|  | #                 'new_eps' tables, InnoDB with foreign keys | ||
|  | # | ||
|  | # See the file 'hosts_eps.sql' for the DDL which creates these tables. | ||
|  | #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Collect the entire 'hosts' table | ||
|  | #------------------------------------------------------------------------------- | ||
|  | $sth1 = $dbh->prepare(q{SELECT * FROM hosts ORDER BY hostid DESC}); | ||
|  | $sth1->execute; | ||
|  | if ( $dbh->err ) { | ||
|  |     die $dbh->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Grab the data as an arrayref of hashrefs | ||
|  | # | ||
|  | my $hosts = $sth1->fetchall_arrayref( {} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Make hashes keyed on the host name and on the id | ||
|  | # | ||
|  | %hosts_by_name = map { $_->{host}   => $_ } @{$hosts}; | ||
|  | %hosts_by_id   = map { $_->{hostid} => $_ } @{$hosts}; | ||
|  | 
 | ||
|  | $max_hostid = max( map { $_->{hostid} } values(%hosts_by_name) ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Collect the entire 'eps' table | ||
|  | #------------------------------------------------------------------------------- | ||
|  | $sth1 = $dbh->prepare(q{SELECT * FROM eps ORDER BY id DESC}); | ||
|  | $sth1->execute; | ||
|  | if ( $dbh->err ) { | ||
|  |     die $dbh->errstr; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Grab the data as an arrayref of hashrefs | ||
|  | # | ||
|  | my $eps = $sth1->fetchall_arrayref( {} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Make a hash keyed on the episode number | ||
|  | # | ||
|  | %eps = map { $_->{id} => $_ } @{$eps}; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Walk the hash of hosts by name, finding double host entries. Stash the | ||
|  | # episode numbers against the hosts (but do it messily resulting in duplicates | ||
|  | # as a side effect) | ||
|  | #------------------------------------------------------------------------------- | ||
|  | foreach my $key ( keys(%hosts_by_name) ) { | ||
|  |     $hostid = $hosts_by_name{$key}->{hostid}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Is this a double ("HostA and HostB") entry? | ||
|  |     # | ||
|  |     if ( @names = ( $key =~ /^([[:print:]]+) and ([[:print:]]+)$/ ) ) { | ||
|  |         printf "%3d: %s\n", $hosts_by_name{$key}->{hostid}, $key; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Process the names picked out of the 'host' field | ||
|  |         # | ||
|  |         foreach my $name (@names) { | ||
|  |             if ( exists( $hosts_by_name{$name} ) ) { | ||
|  |                 # | ||
|  |                 # Known name,  report it | ||
|  |                 # | ||
|  |                 printf "\t%3d: %s\n", $hosts_by_name{$name}->{hostid}, $name; | ||
|  |                 printf "Replace %d with %d\n", | ||
|  |                     $hosts_by_name{$key}->{hostid}, | ||
|  |                     $hosts_by_name{$name}->{hostid}; | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Collect all episodes relating to the double id ($hostid) and | ||
|  |                 # add them to the known id ($hid) | ||
|  |                 # | ||
|  |                 $hid = $hosts_by_name{$name}->{hostid}; | ||
|  |                 $hosts_by_id{$hid}->{eps} | ||
|  |                     = collect_eps( $hostid, \%eps, | ||
|  |                     $hosts_by_id{$hid}->{eps} ); | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Mark the double id as not valid | ||
|  |                 # | ||
|  |                 $hosts_by_id{$hostid}->{valid} = 0; | ||
|  |             } | ||
|  |             else { | ||
|  |                 # | ||
|  |                 # Unknown name, make a new host entry | ||
|  |                 # | ||
|  |                 print "\t'$name' not known\n"; | ||
|  |                 $max_hostid++; | ||
|  |                 $hosts_by_id{$max_hostid} = { | ||
|  |                     'profile'     => '', | ||
|  |                     'local_image' => '0', | ||
|  |                     'hostid'      => $max_hostid, | ||
|  |                     'license'     => 'CC-BY-SA', | ||
|  |                     'host'        => $name, | ||
|  |                     'valid'       => '1', | ||
|  |                     'email'       => '' | ||
|  |                 }; | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Save all episodes for this name | ||
|  |                 # | ||
|  |                 $hid = $hosts_by_name{$key}->{hostid}; | ||
|  |                 $hosts_by_id{$max_hostid}->{eps} | ||
|  |                     = collect_eps( $hid, \%eps, | ||
|  |                     $hosts_by_id{$max_hostid}->{eps} ); | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     else { | ||
|  |         # | ||
|  |         # Single host, just collect all their episodes | ||
|  |         # | ||
|  |         $hosts_by_id{$hostid}->{eps} | ||
|  |             = collect_eps( $hostid, \%eps, $hosts_by_id{$hostid}->{eps} ); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Report on the structure we built, de-duplicating as we go | ||
|  | #------------------------------------------------------------------------------- | ||
|  | foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) { | ||
|  |     if ( exists( $hosts_by_id{$hid}->{eps} ) ) { | ||
|  |         # | ||
|  |         # De-duplicate the episode list | ||
|  |         # | ||
|  |         @{ $hosts_by_id{$hid}->{eps} } | ||
|  |             = sort { $a <=> $b } uniq( @{ $hosts_by_id{$hid}->{eps} } ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Print the host details followed by the episodes | ||
|  |         # | ||
|  |         printf "Hostid: %d [%s,%d] (%d)\n", $hid, | ||
|  |             $hosts_by_id{$hid}->{host}, | ||
|  |             $hosts_by_id{$hid}->{hostid}, | ||
|  |             scalar( @{ $hosts_by_id{$hid}->{eps} } ); | ||
|  |         foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) { | ||
|  |             printf "  Episode: %d\n", $ep; | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Turn the %hosts_by_id hash into database insert statements | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $sql1 = sprintf( "INSERT INTO new_hosts VALUES(%s)", | ||
|  |     join( ",", map { '?' } @host_flds ) ); | ||
|  | $sth1 = $dbh->prepare($sql1); | ||
|  | 
 | ||
|  | my $sql2 = sprintf( "INSERT INTO new_eps VALUES(%s)", | ||
|  |     join( ",", map { '?' } @eps_flds ) ); | ||
|  | $sth2 = $dbh->prepare($sql2); | ||
|  | 
 | ||
|  | $sth3 = $dbh->prepare(q{INSERT INTO hosts_eps VALUES(?,?)}); | ||
|  | 
 | ||
|  | # | ||
|  | # The 'new_hosts' table | ||
|  | # | ||
|  | foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) { | ||
|  |     $sth1->execute( @{ $hosts_by_id{$hid} }{@host_flds} ); | ||
|  |     if ( $dbh->err ) { | ||
|  |         die $dbh->errstr; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # The 'new_eps' table | ||
|  | # | ||
|  | foreach my $eid ( sort { $a <=> $b } keys(%eps) ) { | ||
|  |     $sth2->execute( @{ $eps{$eid} }{@eps_flds} ); | ||
|  |     if ( $dbh->err ) { | ||
|  |         die $dbh->errstr; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # The 'hosts_eps' table | ||
|  | # | ||
|  | foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) { | ||
|  |     if ( exists( $hosts_by_id{$hid}->{eps} ) ) { | ||
|  |         foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) { | ||
|  |             $sth3->execute( $hosts_by_id{$hid}->{hostid}, $ep ); | ||
|  |             if ( $dbh->err ) { | ||
|  |                 die $dbh->errstr; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: collect_eps | ||
|  | #      PURPOSE: Collect all the episodes relating to a hostid and return them, | ||
|  | #               along with the contents of $current as an array of hashrefs | ||
|  | #   PARAMETERS: $hostid         the host id we're interested in | ||
|  | #               $eps            hashref containing anonymous hashes keyed by | ||
|  | #                               episode number | ||
|  | #               $current        a reference to any existing array of episodes | ||
|  | #                               for this host id | ||
|  | #      RETURNS: A reference to the resulting array of anonymous hashes | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub collect_eps { | ||
|  |     my ( $hostid, $eps, $current ) = @_; | ||
|  |     my @host_eps; | ||
|  | 
 | ||
|  |     @host_eps = @{$current} if $current; | ||
|  | 
 | ||
|  |     foreach my $epsid ( keys(%$eps) ) { | ||
|  |         if ( $eps->{$epsid}->{hostid} == $hostid ) { | ||
|  |             push( @host_eps, $epsid ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return \@host_eps; | ||
|  | } | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker |