#!/usr/bin/env perl #=============================================================================== # # FILE: remodel_db_host_eps # # USAGE: ./remodel_db_host_eps # # DESCRIPTION: Remodel the 'hosts' and 'eps' tables in the HPR database so # that a many-to-many relationship between host and episode can # be established. # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.0.2 # CREATED: 2014-05-08 10:55:28 # REVISION: 2015-06-26 13:33:20 # #=============================================================================== use 5.010; use strict; use warnings; use utf8; use YAML::Syck; use List::Util qw{max}; use List::MoreUtils qw{uniq}; use DBI; #use Data::Dumper; # # Version number (manually incremented) # our $VERSION = '0.0.2'; # # Script name # ( my $PROG = $0 ) =~ s|.*/||mx; ( my $DIR = $0 ) =~ s|/?[^/]*$||mx; $DIR = '.' unless $DIR; #------------------------------------------------------------------------------- # Declarations #------------------------------------------------------------------------------- # # Constants and other declarations # my $basedir = "$ENV{HOME}/HPR/Database"; my $configfile = "$basedir/.hpr_db.yml"; my ( $dbh, $sth1, $sth2, $sth3, $sth4, $h1, $h2, $rv ); my ( %hosts_by_name, %hosts_by_id, %eps, @names, $hostid, $hid, $max_hostid ); # # Names of fields in the 'hosts' table in the appropriate order for the later # INSERT statement # my @host_flds = qw{ hostid host email profile license local_image valid }; # # Names of fields in the 'eps' table in the appropriate order for the later # INSERT statement. Note that it omits the 'hostid' field. # my @eps_flds = qw{ id date title summary notes series explicit license tags version valid }; # # Enable Unicode output mode # binmode STDOUT, ":encoding(UTF-8)"; binmode STDERR, ":encoding(UTF-8)"; # # Load configuration data # my %config = %{ LoadFile($configfile) }; #------------------------------------------------------------------------------- # Connect to the database #------------------------------------------------------------------------------- my $dbname = $config{database}->{name}; my $dbuser = $config{database}->{user}; my $dbpwd = $config{database}->{password}; $dbh = DBI->connect( "dbi:mysql:dbname=$dbname", $dbuser, $dbpwd, { AutoCommit => 1 } ) or die $DBI::errstr; # # Enable client-side UTF8 # $dbh->{mysql_enable_utf8} = 1; #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= # The expectation is that we have the following original tables: # hosts - the details of each host (contains some double entries # "hostA and hostB") # eps - the details of all episodes, currently with a host id number # against each one # # We also have the following new tables for the transition: # new_hosts - an empty copy of the 'hosts' table, InnoDB # new_eps - an empty copy of the 'eps' table without the 'hostid' # column, InnoDB # hosts_eps - a mapping table for joining together the 'new_hosts' and # 'new_eps' tables, InnoDB with foreign keys # # See the file 'hosts_eps.sql' for the DDL which creates these tables. #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= #------------------------------------------------------------------------------- # Collect the entire 'hosts' table #------------------------------------------------------------------------------- $sth1 = $dbh->prepare(q{SELECT * FROM hosts ORDER BY hostid DESC}); $sth1->execute; if ( $dbh->err ) { die $dbh->errstr; } # # Grab the data as an arrayref of hashrefs # my $hosts = $sth1->fetchall_arrayref( {} ); # # Make hashes keyed on the host name and on the id # %hosts_by_name = map { $_->{host} => $_ } @{$hosts}; %hosts_by_id = map { $_->{hostid} => $_ } @{$hosts}; $max_hostid = max( map { $_->{hostid} } values(%hosts_by_name) ); #------------------------------------------------------------------------------- # Collect the entire 'eps' table #------------------------------------------------------------------------------- $sth1 = $dbh->prepare(q{SELECT * FROM eps ORDER BY id DESC}); $sth1->execute; if ( $dbh->err ) { die $dbh->errstr; } # # Grab the data as an arrayref of hashrefs # my $eps = $sth1->fetchall_arrayref( {} ); # # Make a hash keyed on the episode number # %eps = map { $_->{id} => $_ } @{$eps}; #------------------------------------------------------------------------------- # Walk the hash of hosts by name, finding double host entries. Stash the # episode numbers against the hosts (but do it messily resulting in duplicates # as a side effect) #------------------------------------------------------------------------------- foreach my $key ( keys(%hosts_by_name) ) { $hostid = $hosts_by_name{$key}->{hostid}; # # Is this a double ("HostA and HostB") entry? # if ( @names = ( $key =~ /^([[:print:]]+) and ([[:print:]]+)$/ ) ) { printf "%3d: %s\n", $hosts_by_name{$key}->{hostid}, $key; # # Process the names picked out of the 'host' field # foreach my $name (@names) { if ( exists( $hosts_by_name{$name} ) ) { # # Known name, report it # printf "\t%3d: %s\n", $hosts_by_name{$name}->{hostid}, $name; printf "Replace %d with %d\n", $hosts_by_name{$key}->{hostid}, $hosts_by_name{$name}->{hostid}; # # Collect all episodes relating to the double id ($hostid) and # add them to the known id ($hid) # $hid = $hosts_by_name{$name}->{hostid}; $hosts_by_id{$hid}->{eps} = collect_eps( $hostid, \%eps, $hosts_by_id{$hid}->{eps} ); # # Mark the double id as not valid # $hosts_by_id{$hostid}->{valid} = 0; } else { # # Unknown name, make a new host entry # print "\t'$name' not known\n"; $max_hostid++; $hosts_by_id{$max_hostid} = { 'profile' => '', 'local_image' => '0', 'hostid' => $max_hostid, 'license' => 'CC-BY-SA', 'host' => $name, 'valid' => '1', 'email' => '' }; # # Save all episodes for this name # $hid = $hosts_by_name{$key}->{hostid}; $hosts_by_id{$max_hostid}->{eps} = collect_eps( $hid, \%eps, $hosts_by_id{$max_hostid}->{eps} ); } } } else { # # Single host, just collect all their episodes # $hosts_by_id{$hostid}->{eps} = collect_eps( $hostid, \%eps, $hosts_by_id{$hostid}->{eps} ); } } #------------------------------------------------------------------------------- # Report on the structure we built, de-duplicating as we go #------------------------------------------------------------------------------- foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) { if ( exists( $hosts_by_id{$hid}->{eps} ) ) { # # De-duplicate the episode list # @{ $hosts_by_id{$hid}->{eps} } = sort { $a <=> $b } uniq( @{ $hosts_by_id{$hid}->{eps} } ); # # Print the host details followed by the episodes # printf "Hostid: %d [%s,%d] (%d)\n", $hid, $hosts_by_id{$hid}->{host}, $hosts_by_id{$hid}->{hostid}, scalar( @{ $hosts_by_id{$hid}->{eps} } ); foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) { printf " Episode: %d\n", $ep; } } } #------------------------------------------------------------------------------- # Turn the %hosts_by_id hash into database insert statements #------------------------------------------------------------------------------- my $sql1 = sprintf( "INSERT INTO new_hosts VALUES(%s)", join( ",", map { '?' } @host_flds ) ); $sth1 = $dbh->prepare($sql1); my $sql2 = sprintf( "INSERT INTO new_eps VALUES(%s)", join( ",", map { '?' } @eps_flds ) ); $sth2 = $dbh->prepare($sql2); $sth3 = $dbh->prepare(q{INSERT INTO hosts_eps VALUES(?,?)}); # # The 'new_hosts' table # foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) { $sth1->execute( @{ $hosts_by_id{$hid} }{@host_flds} ); if ( $dbh->err ) { die $dbh->errstr; } } # # The 'new_eps' table # foreach my $eid ( sort { $a <=> $b } keys(%eps) ) { $sth2->execute( @{ $eps{$eid} }{@eps_flds} ); if ( $dbh->err ) { die $dbh->errstr; } } # # The 'hosts_eps' table # foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) { if ( exists( $hosts_by_id{$hid}->{eps} ) ) { foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) { $sth3->execute( $hosts_by_id{$hid}->{hostid}, $ep ); if ( $dbh->err ) { die $dbh->errstr; } } } } exit; #=== FUNCTION ================================================================ # NAME: collect_eps # PURPOSE: Collect all the episodes relating to a hostid and return them, # along with the contents of $current as an array of hashrefs # PARAMETERS: $hostid the host id we're interested in # $eps hashref containing anonymous hashes keyed by # episode number # $current a reference to any existing array of episodes # for this host id # RETURNS: A reference to the resulting array of anonymous hashes # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub collect_eps { my ( $hostid, $eps, $current ) = @_; my @host_eps; @host_eps = @{$current} if $current; foreach my $epsid ( keys(%$eps) ) { if ( $eps->{$epsid}->{hostid} == $hostid ) { push( @host_eps, $epsid ); } } return \@host_eps; } # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker