355 lines
10 KiB
Plaintext
355 lines
10 KiB
Plaintext
|
#!/usr/bin/env perl
|
||
|
#===============================================================================
|
||
|
#
|
||
|
# FILE: remodel_db_host_eps
|
||
|
#
|
||
|
# USAGE: ./remodel_db_host_eps
|
||
|
#
|
||
|
# DESCRIPTION: Remodel the 'hosts' and 'eps' tables in the HPR database so
|
||
|
# that a many-to-many relationship between host and episode can
|
||
|
# be established.
|
||
|
#
|
||
|
# OPTIONS: ---
|
||
|
# REQUIREMENTS: ---
|
||
|
# BUGS: ---
|
||
|
# NOTES: ---
|
||
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||
|
# VERSION: 0.0.2
|
||
|
# CREATED: 2014-05-08 10:55:28
|
||
|
# REVISION: 2015-06-26 13:33:20
|
||
|
#
|
||
|
#===============================================================================
|
||
|
|
||
|
use 5.010;
|
||
|
use strict;
|
||
|
use warnings;
|
||
|
use utf8;
|
||
|
|
||
|
use YAML::Syck;
|
||
|
use List::Util qw{max};
|
||
|
use List::MoreUtils qw{uniq};
|
||
|
use DBI;
|
||
|
#use Data::Dumper;
|
||
|
|
||
|
#
|
||
|
# Version number (manually incremented)
|
||
|
#
|
||
|
our $VERSION = '0.0.2';
|
||
|
|
||
|
#
|
||
|
# Script name
|
||
|
#
|
||
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
||
|
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
||
|
$DIR = '.' unless $DIR;
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Declarations
|
||
|
#-------------------------------------------------------------------------------
|
||
|
#
|
||
|
# Constants and other declarations
|
||
|
#
|
||
|
my $basedir = "$ENV{HOME}/HPR/Database";
|
||
|
my $configfile = "$basedir/.hpr_db.yml";
|
||
|
|
||
|
my ( $dbh, $sth1, $sth2, $sth3, $sth4, $h1, $h2, $rv );
|
||
|
my ( %hosts_by_name, %hosts_by_id, %eps, @names, $hostid, $hid, $max_hostid );
|
||
|
|
||
|
#
|
||
|
# Names of fields in the 'hosts' table in the appropriate order for the later
|
||
|
# INSERT statement
|
||
|
#
|
||
|
my @host_flds = qw{
|
||
|
hostid
|
||
|
host
|
||
|
email
|
||
|
profile
|
||
|
license
|
||
|
local_image
|
||
|
valid
|
||
|
};
|
||
|
|
||
|
#
|
||
|
# Names of fields in the 'eps' table in the appropriate order for the later
|
||
|
# INSERT statement. Note that it omits the 'hostid' field.
|
||
|
#
|
||
|
my @eps_flds = qw{
|
||
|
id
|
||
|
date
|
||
|
title
|
||
|
summary
|
||
|
notes
|
||
|
series
|
||
|
explicit
|
||
|
license
|
||
|
tags
|
||
|
version
|
||
|
valid
|
||
|
};
|
||
|
|
||
|
#
|
||
|
# Enable Unicode output mode
|
||
|
#
|
||
|
binmode STDOUT, ":encoding(UTF-8)";
|
||
|
binmode STDERR, ":encoding(UTF-8)";
|
||
|
|
||
|
#
|
||
|
# Load configuration data
|
||
|
#
|
||
|
my %config = %{ LoadFile($configfile) };
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Connect to the database
|
||
|
#-------------------------------------------------------------------------------
|
||
|
my $dbname = $config{database}->{name};
|
||
|
my $dbuser = $config{database}->{user};
|
||
|
my $dbpwd = $config{database}->{password};
|
||
|
$dbh
|
||
|
= DBI->connect( "dbi:mysql:dbname=$dbname", $dbuser, $dbpwd,
|
||
|
{ AutoCommit => 1 } )
|
||
|
or die $DBI::errstr;
|
||
|
|
||
|
#
|
||
|
# Enable client-side UTF8
|
||
|
#
|
||
|
$dbh->{mysql_enable_utf8} = 1;
|
||
|
|
||
|
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
# The expectation is that we have the following original tables:
|
||
|
# hosts - the details of each host (contains some double entries
|
||
|
# "hostA and hostB")
|
||
|
# eps - the details of all episodes, currently with a host id number
|
||
|
# against each one
|
||
|
#
|
||
|
# We also have the following new tables for the transition:
|
||
|
# new_hosts - an empty copy of the 'hosts' table, InnoDB
|
||
|
# new_eps - an empty copy of the 'eps' table without the 'hostid'
|
||
|
# column, InnoDB
|
||
|
# hosts_eps - a mapping table for joining together the 'new_hosts' and
|
||
|
# 'new_eps' tables, InnoDB with foreign keys
|
||
|
#
|
||
|
# See the file 'hosts_eps.sql' for the DDL which creates these tables.
|
||
|
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Collect the entire 'hosts' table
|
||
|
#-------------------------------------------------------------------------------
|
||
|
$sth1 = $dbh->prepare(q{SELECT * FROM hosts ORDER BY hostid DESC});
|
||
|
$sth1->execute;
|
||
|
if ( $dbh->err ) {
|
||
|
die $dbh->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Grab the data as an arrayref of hashrefs
|
||
|
#
|
||
|
my $hosts = $sth1->fetchall_arrayref( {} );
|
||
|
|
||
|
#
|
||
|
# Make hashes keyed on the host name and on the id
|
||
|
#
|
||
|
%hosts_by_name = map { $_->{host} => $_ } @{$hosts};
|
||
|
%hosts_by_id = map { $_->{hostid} => $_ } @{$hosts};
|
||
|
|
||
|
$max_hostid = max( map { $_->{hostid} } values(%hosts_by_name) );
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Collect the entire 'eps' table
|
||
|
#-------------------------------------------------------------------------------
|
||
|
$sth1 = $dbh->prepare(q{SELECT * FROM eps ORDER BY id DESC});
|
||
|
$sth1->execute;
|
||
|
if ( $dbh->err ) {
|
||
|
die $dbh->errstr;
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# Grab the data as an arrayref of hashrefs
|
||
|
#
|
||
|
my $eps = $sth1->fetchall_arrayref( {} );
|
||
|
|
||
|
#
|
||
|
# Make a hash keyed on the episode number
|
||
|
#
|
||
|
%eps = map { $_->{id} => $_ } @{$eps};
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Walk the hash of hosts by name, finding double host entries. Stash the
|
||
|
# episode numbers against the hosts (but do it messily resulting in duplicates
|
||
|
# as a side effect)
|
||
|
#-------------------------------------------------------------------------------
|
||
|
foreach my $key ( keys(%hosts_by_name) ) {
|
||
|
$hostid = $hosts_by_name{$key}->{hostid};
|
||
|
|
||
|
#
|
||
|
# Is this a double ("HostA and HostB") entry?
|
||
|
#
|
||
|
if ( @names = ( $key =~ /^([[:print:]]+) and ([[:print:]]+)$/ ) ) {
|
||
|
printf "%3d: %s\n", $hosts_by_name{$key}->{hostid}, $key;
|
||
|
|
||
|
#
|
||
|
# Process the names picked out of the 'host' field
|
||
|
#
|
||
|
foreach my $name (@names) {
|
||
|
if ( exists( $hosts_by_name{$name} ) ) {
|
||
|
#
|
||
|
# Known name, report it
|
||
|
#
|
||
|
printf "\t%3d: %s\n", $hosts_by_name{$name}->{hostid}, $name;
|
||
|
printf "Replace %d with %d\n",
|
||
|
$hosts_by_name{$key}->{hostid},
|
||
|
$hosts_by_name{$name}->{hostid};
|
||
|
|
||
|
#
|
||
|
# Collect all episodes relating to the double id ($hostid) and
|
||
|
# add them to the known id ($hid)
|
||
|
#
|
||
|
$hid = $hosts_by_name{$name}->{hostid};
|
||
|
$hosts_by_id{$hid}->{eps}
|
||
|
= collect_eps( $hostid, \%eps,
|
||
|
$hosts_by_id{$hid}->{eps} );
|
||
|
|
||
|
#
|
||
|
# Mark the double id as not valid
|
||
|
#
|
||
|
$hosts_by_id{$hostid}->{valid} = 0;
|
||
|
}
|
||
|
else {
|
||
|
#
|
||
|
# Unknown name, make a new host entry
|
||
|
#
|
||
|
print "\t'$name' not known\n";
|
||
|
$max_hostid++;
|
||
|
$hosts_by_id{$max_hostid} = {
|
||
|
'profile' => '',
|
||
|
'local_image' => '0',
|
||
|
'hostid' => $max_hostid,
|
||
|
'license' => 'CC-BY-SA',
|
||
|
'host' => $name,
|
||
|
'valid' => '1',
|
||
|
'email' => ''
|
||
|
};
|
||
|
|
||
|
#
|
||
|
# Save all episodes for this name
|
||
|
#
|
||
|
$hid = $hosts_by_name{$key}->{hostid};
|
||
|
$hosts_by_id{$max_hostid}->{eps}
|
||
|
= collect_eps( $hid, \%eps,
|
||
|
$hosts_by_id{$max_hostid}->{eps} );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
#
|
||
|
# Single host, just collect all their episodes
|
||
|
#
|
||
|
$hosts_by_id{$hostid}->{eps}
|
||
|
= collect_eps( $hostid, \%eps, $hosts_by_id{$hostid}->{eps} );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Report on the structure we built, de-duplicating as we go
|
||
|
#-------------------------------------------------------------------------------
|
||
|
foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
|
||
|
if ( exists( $hosts_by_id{$hid}->{eps} ) ) {
|
||
|
#
|
||
|
# De-duplicate the episode list
|
||
|
#
|
||
|
@{ $hosts_by_id{$hid}->{eps} }
|
||
|
= sort { $a <=> $b } uniq( @{ $hosts_by_id{$hid}->{eps} } );
|
||
|
|
||
|
#
|
||
|
# Print the host details followed by the episodes
|
||
|
#
|
||
|
printf "Hostid: %d [%s,%d] (%d)\n", $hid,
|
||
|
$hosts_by_id{$hid}->{host},
|
||
|
$hosts_by_id{$hid}->{hostid},
|
||
|
scalar( @{ $hosts_by_id{$hid}->{eps} } );
|
||
|
foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) {
|
||
|
printf " Episode: %d\n", $ep;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#-------------------------------------------------------------------------------
|
||
|
# Turn the %hosts_by_id hash into database insert statements
|
||
|
#-------------------------------------------------------------------------------
|
||
|
my $sql1 = sprintf( "INSERT INTO new_hosts VALUES(%s)",
|
||
|
join( ",", map { '?' } @host_flds ) );
|
||
|
$sth1 = $dbh->prepare($sql1);
|
||
|
|
||
|
my $sql2 = sprintf( "INSERT INTO new_eps VALUES(%s)",
|
||
|
join( ",", map { '?' } @eps_flds ) );
|
||
|
$sth2 = $dbh->prepare($sql2);
|
||
|
|
||
|
$sth3 = $dbh->prepare(q{INSERT INTO hosts_eps VALUES(?,?)});
|
||
|
|
||
|
#
|
||
|
# The 'new_hosts' table
|
||
|
#
|
||
|
foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
|
||
|
$sth1->execute( @{ $hosts_by_id{$hid} }{@host_flds} );
|
||
|
if ( $dbh->err ) {
|
||
|
die $dbh->errstr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# The 'new_eps' table
|
||
|
#
|
||
|
foreach my $eid ( sort { $a <=> $b } keys(%eps) ) {
|
||
|
$sth2->execute( @{ $eps{$eid} }{@eps_flds} );
|
||
|
if ( $dbh->err ) {
|
||
|
die $dbh->errstr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#
|
||
|
# The 'hosts_eps' table
|
||
|
#
|
||
|
foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
|
||
|
if ( exists( $hosts_by_id{$hid}->{eps} ) ) {
|
||
|
foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) {
|
||
|
$sth3->execute( $hosts_by_id{$hid}->{hostid}, $ep );
|
||
|
if ( $dbh->err ) {
|
||
|
die $dbh->errstr;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
exit;
|
||
|
|
||
|
#=== FUNCTION ================================================================
|
||
|
# NAME: collect_eps
|
||
|
# PURPOSE: Collect all the episodes relating to a hostid and return them,
|
||
|
# along with the contents of $current as an array of hashrefs
|
||
|
# PARAMETERS: $hostid the host id we're interested in
|
||
|
# $eps hashref containing anonymous hashes keyed by
|
||
|
# episode number
|
||
|
# $current a reference to any existing array of episodes
|
||
|
# for this host id
|
||
|
# RETURNS: A reference to the resulting array of anonymous hashes
|
||
|
# DESCRIPTION:
|
||
|
# THROWS: No exceptions
|
||
|
# COMMENTS: None
|
||
|
# SEE ALSO: N/A
|
||
|
#===============================================================================
|
||
|
sub collect_eps {
|
||
|
my ( $hostid, $eps, $current ) = @_;
|
||
|
my @host_eps;
|
||
|
|
||
|
@host_eps = @{$current} if $current;
|
||
|
|
||
|
foreach my $epsid ( keys(%$eps) ) {
|
||
|
if ( $eps->{$epsid}->{hostid} == $hostid ) {
|
||
|
push( @host_eps, $epsid );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return \@host_eps;
|
||
|
}
|
||
|
|
||
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|