forked from HPR/hpr-tools
		
	
		
			
				
	
	
		
			355 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			355 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env perl
 | |
| #===============================================================================
 | |
| #
 | |
| #         FILE: remodel_db_host_eps
 | |
| #
 | |
| #        USAGE: ./remodel_db_host_eps
 | |
| #
 | |
| #  DESCRIPTION: Remodel the 'hosts' and 'eps' tables in the HPR database so
 | |
| #               that a many-to-many relationship between host and episode can
 | |
| #               be established.
 | |
| #
 | |
| #      OPTIONS: ---
 | |
| # REQUIREMENTS: ---
 | |
| #         BUGS: ---
 | |
| #        NOTES: ---
 | |
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | |
| #      VERSION: 0.0.2
 | |
| #      CREATED: 2014-05-08 10:55:28
 | |
| #     REVISION: 2015-06-26 13:33:20
 | |
| #
 | |
| #===============================================================================
 | |
| 
 | |
| use 5.010;
 | |
| use strict;
 | |
| use warnings;
 | |
| use utf8;
 | |
| 
 | |
| use YAML::Syck;
 | |
| use List::Util qw{max};
 | |
| use List::MoreUtils qw{uniq};
 | |
| use DBI;
 | |
| #use Data::Dumper;
 | |
| 
 | |
| #
 | |
| # Version number (manually incremented)
 | |
| #
 | |
| our $VERSION = '0.0.2';
 | |
| 
 | |
| #
 | |
| # Script name
 | |
| #
 | |
| ( my $PROG = $0 ) =~ s|.*/||mx;
 | |
| ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
 | |
| $DIR = '.' unless $DIR;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Declarations
 | |
| #-------------------------------------------------------------------------------
 | |
| #
 | |
| # Constants and other declarations
 | |
| #
 | |
| my $basedir    = "$ENV{HOME}/HPR/Database";
 | |
| my $configfile = "$basedir/.hpr_db.yml";
 | |
| 
 | |
| my ( $dbh, $sth1, $sth2, $sth3, $sth4, $h1, $h2, $rv );
 | |
| my ( %hosts_by_name, %hosts_by_id, %eps, @names, $hostid, $hid, $max_hostid );
 | |
| 
 | |
| #
 | |
| # Names of fields in the 'hosts' table in the appropriate order for the later
 | |
| # INSERT statement
 | |
| #
 | |
| my @host_flds = qw{
 | |
|     hostid
 | |
|     host
 | |
|     email
 | |
|     profile
 | |
|     license
 | |
|     local_image
 | |
|     valid
 | |
| };
 | |
| 
 | |
| #
 | |
| # Names of fields in the 'eps' table in the appropriate order for the later
 | |
| # INSERT statement. Note that it omits the 'hostid' field.
 | |
| #
 | |
| my @eps_flds = qw{
 | |
|     id
 | |
|     date
 | |
|     title
 | |
|     summary
 | |
|     notes
 | |
|     series
 | |
|     explicit
 | |
|     license
 | |
|     tags
 | |
|     version
 | |
|     valid
 | |
| };
 | |
| 
 | |
| #
 | |
| # Enable Unicode output mode
 | |
| #
 | |
| binmode STDOUT, ":encoding(UTF-8)";
 | |
| binmode STDERR, ":encoding(UTF-8)";
 | |
| 
 | |
| #
 | |
| # Load configuration data
 | |
| #
 | |
| my %config = %{ LoadFile($configfile) };
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Connect to the database
 | |
| #-------------------------------------------------------------------------------
 | |
| my $dbname = $config{database}->{name};
 | |
| my $dbuser = $config{database}->{user};
 | |
| my $dbpwd  = $config{database}->{password};
 | |
| $dbh
 | |
|     = DBI->connect( "dbi:mysql:dbname=$dbname", $dbuser, $dbpwd,
 | |
|     { AutoCommit => 1 } )
 | |
|     or die $DBI::errstr;
 | |
| 
 | |
| #
 | |
| # Enable client-side UTF8
 | |
| #
 | |
| $dbh->{mysql_enable_utf8} = 1;
 | |
| 
 | |
| #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | |
| # The expectation is that we have the following original tables:
 | |
| # hosts         - the details of each host (contains some double entries
 | |
| #                 "hostA and hostB")
 | |
| # eps           - the details of all episodes, currently with a host id number
 | |
| #                 against each one
 | |
| #
 | |
| # We also have the following new tables for the transition:
 | |
| # new_hosts     - an empty copy of the 'hosts' table, InnoDB
 | |
| # new_eps       - an empty copy of the 'eps' table without the 'hostid'
 | |
| #                 column, InnoDB
 | |
| # hosts_eps     - a mapping table for joining together the 'new_hosts' and
 | |
| #                 'new_eps' tables, InnoDB with foreign keys
 | |
| #
 | |
| # See the file 'hosts_eps.sql' for the DDL which creates these tables.
 | |
| #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Collect the entire 'hosts' table
 | |
| #-------------------------------------------------------------------------------
 | |
| $sth1 = $dbh->prepare(q{SELECT * FROM hosts ORDER BY hostid DESC});
 | |
| $sth1->execute;
 | |
| if ( $dbh->err ) {
 | |
|     die $dbh->errstr;
 | |
| }
 | |
| 
 | |
| #
 | |
| # Grab the data as an arrayref of hashrefs
 | |
| #
 | |
| my $hosts = $sth1->fetchall_arrayref( {} );
 | |
| 
 | |
| #
 | |
| # Make hashes keyed on the host name and on the id
 | |
| #
 | |
| %hosts_by_name = map { $_->{host}   => $_ } @{$hosts};
 | |
| %hosts_by_id   = map { $_->{hostid} => $_ } @{$hosts};
 | |
| 
 | |
| $max_hostid = max( map { $_->{hostid} } values(%hosts_by_name) );
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Collect the entire 'eps' table
 | |
| #-------------------------------------------------------------------------------
 | |
| $sth1 = $dbh->prepare(q{SELECT * FROM eps ORDER BY id DESC});
 | |
| $sth1->execute;
 | |
| if ( $dbh->err ) {
 | |
|     die $dbh->errstr;
 | |
| }
 | |
| 
 | |
| #
 | |
| # Grab the data as an arrayref of hashrefs
 | |
| #
 | |
| my $eps = $sth1->fetchall_arrayref( {} );
 | |
| 
 | |
| #
 | |
| # Make a hash keyed on the episode number
 | |
| #
 | |
| %eps = map { $_->{id} => $_ } @{$eps};
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Walk the hash of hosts by name, finding double host entries. Stash the
 | |
| # episode numbers against the hosts (but do it messily resulting in duplicates
 | |
| # as a side effect)
 | |
| #-------------------------------------------------------------------------------
 | |
| foreach my $key ( keys(%hosts_by_name) ) {
 | |
|     $hostid = $hosts_by_name{$key}->{hostid};
 | |
| 
 | |
|     #
 | |
|     # Is this a double ("HostA and HostB") entry?
 | |
|     #
 | |
|     if ( @names = ( $key =~ /^([[:print:]]+) and ([[:print:]]+)$/ ) ) {
 | |
|         printf "%3d: %s\n", $hosts_by_name{$key}->{hostid}, $key;
 | |
| 
 | |
|         #
 | |
|         # Process the names picked out of the 'host' field
 | |
|         #
 | |
|         foreach my $name (@names) {
 | |
|             if ( exists( $hosts_by_name{$name} ) ) {
 | |
|                 #
 | |
|                 # Known name,  report it
 | |
|                 #
 | |
|                 printf "\t%3d: %s\n", $hosts_by_name{$name}->{hostid}, $name;
 | |
|                 printf "Replace %d with %d\n",
 | |
|                     $hosts_by_name{$key}->{hostid},
 | |
|                     $hosts_by_name{$name}->{hostid};
 | |
| 
 | |
|                 #
 | |
|                 # Collect all episodes relating to the double id ($hostid) and
 | |
|                 # add them to the known id ($hid)
 | |
|                 #
 | |
|                 $hid = $hosts_by_name{$name}->{hostid};
 | |
|                 $hosts_by_id{$hid}->{eps}
 | |
|                     = collect_eps( $hostid, \%eps,
 | |
|                     $hosts_by_id{$hid}->{eps} );
 | |
| 
 | |
|                 #
 | |
|                 # Mark the double id as not valid
 | |
|                 #
 | |
|                 $hosts_by_id{$hostid}->{valid} = 0;
 | |
|             }
 | |
|             else {
 | |
|                 #
 | |
|                 # Unknown name, make a new host entry
 | |
|                 #
 | |
|                 print "\t'$name' not known\n";
 | |
|                 $max_hostid++;
 | |
|                 $hosts_by_id{$max_hostid} = {
 | |
|                     'profile'     => '',
 | |
|                     'local_image' => '0',
 | |
|                     'hostid'      => $max_hostid,
 | |
|                     'license'     => 'CC-BY-SA',
 | |
|                     'host'        => $name,
 | |
|                     'valid'       => '1',
 | |
|                     'email'       => ''
 | |
|                 };
 | |
| 
 | |
|                 #
 | |
|                 # Save all episodes for this name
 | |
|                 #
 | |
|                 $hid = $hosts_by_name{$key}->{hostid};
 | |
|                 $hosts_by_id{$max_hostid}->{eps}
 | |
|                     = collect_eps( $hid, \%eps,
 | |
|                     $hosts_by_id{$max_hostid}->{eps} );
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     else {
 | |
|         #
 | |
|         # Single host, just collect all their episodes
 | |
|         #
 | |
|         $hosts_by_id{$hostid}->{eps}
 | |
|             = collect_eps( $hostid, \%eps, $hosts_by_id{$hostid}->{eps} );
 | |
|     }
 | |
| }
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Report on the structure we built, de-duplicating as we go
 | |
| #-------------------------------------------------------------------------------
 | |
| foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
 | |
|     if ( exists( $hosts_by_id{$hid}->{eps} ) ) {
 | |
|         #
 | |
|         # De-duplicate the episode list
 | |
|         #
 | |
|         @{ $hosts_by_id{$hid}->{eps} }
 | |
|             = sort { $a <=> $b } uniq( @{ $hosts_by_id{$hid}->{eps} } );
 | |
| 
 | |
|         #
 | |
|         # Print the host details followed by the episodes
 | |
|         #
 | |
|         printf "Hostid: %d [%s,%d] (%d)\n", $hid,
 | |
|             $hosts_by_id{$hid}->{host},
 | |
|             $hosts_by_id{$hid}->{hostid},
 | |
|             scalar( @{ $hosts_by_id{$hid}->{eps} } );
 | |
|         foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) {
 | |
|             printf "  Episode: %d\n", $ep;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Turn the %hosts_by_id hash into database insert statements
 | |
| #-------------------------------------------------------------------------------
 | |
| my $sql1 = sprintf( "INSERT INTO new_hosts VALUES(%s)",
 | |
|     join( ",", map { '?' } @host_flds ) );
 | |
| $sth1 = $dbh->prepare($sql1);
 | |
| 
 | |
| my $sql2 = sprintf( "INSERT INTO new_eps VALUES(%s)",
 | |
|     join( ",", map { '?' } @eps_flds ) );
 | |
| $sth2 = $dbh->prepare($sql2);
 | |
| 
 | |
| $sth3 = $dbh->prepare(q{INSERT INTO hosts_eps VALUES(?,?)});
 | |
| 
 | |
| #
 | |
| # The 'new_hosts' table
 | |
| #
 | |
| foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
 | |
|     $sth1->execute( @{ $hosts_by_id{$hid} }{@host_flds} );
 | |
|     if ( $dbh->err ) {
 | |
|         die $dbh->errstr;
 | |
|     }
 | |
| }
 | |
| 
 | |
| #
 | |
| # The 'new_eps' table
 | |
| #
 | |
| foreach my $eid ( sort { $a <=> $b } keys(%eps) ) {
 | |
|     $sth2->execute( @{ $eps{$eid} }{@eps_flds} );
 | |
|     if ( $dbh->err ) {
 | |
|         die $dbh->errstr;
 | |
|     }
 | |
| }
 | |
| 
 | |
| #
 | |
| # The 'hosts_eps' table
 | |
| #
 | |
| foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
 | |
|     if ( exists( $hosts_by_id{$hid}->{eps} ) ) {
 | |
|         foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) {
 | |
|             $sth3->execute( $hosts_by_id{$hid}->{hostid}, $ep );
 | |
|             if ( $dbh->err ) {
 | |
|                 die $dbh->errstr;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| exit;
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: collect_eps
 | |
| #      PURPOSE: Collect all the episodes relating to a hostid and return them,
 | |
| #               along with the contents of $current as an array of hashrefs
 | |
| #   PARAMETERS: $hostid         the host id we're interested in
 | |
| #               $eps            hashref containing anonymous hashes keyed by
 | |
| #                               episode number
 | |
| #               $current        a reference to any existing array of episodes
 | |
| #                               for this host id
 | |
| #      RETURNS: A reference to the resulting array of anonymous hashes
 | |
| #  DESCRIPTION:
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub collect_eps {
 | |
|     my ( $hostid, $eps, $current ) = @_;
 | |
|     my @host_eps;
 | |
| 
 | |
|     @host_eps = @{$current} if $current;
 | |
| 
 | |
|     foreach my $epsid ( keys(%$eps) ) {
 | |
|         if ( $eps->{$epsid}->{hostid} == $hostid ) {
 | |
|             push( @host_eps, $epsid );
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return \@host_eps;
 | |
| }
 | |
| 
 | |
| # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 |