#!/usr/bin/env perl
#===============================================================================
#
#         FILE: remodel_db_host_eps
#
#        USAGE: ./remodel_db_host_eps
#
#  DESCRIPTION: Remodel the 'hosts' and 'eps' tables in the HPR database so
#               that a many-to-many relationship between host and episode can
#               be established.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 0.0.2
#      CREATED: 2014-05-08 10:55:28
#     REVISION: 2015-06-26 13:33:20
#
#===============================================================================

use 5.010;
use strict;
use warnings;
use utf8;

use YAML::Syck;
use List::Util qw{max};
use List::MoreUtils qw{uniq};
use DBI;
#use Data::Dumper;

#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';

#
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;

#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir    = "$ENV{HOME}/HPR/Database";
my $configfile = "$basedir/.hpr_db.yml";

my ( $dbh, $sth1, $sth2, $sth3, $sth4, $h1, $h2, $rv );
my ( %hosts_by_name, %hosts_by_id, %eps, @names, $hostid, $hid, $max_hostid );

#
# Names of fields in the 'hosts' table in the appropriate order for the later
# INSERT statement
#
my @host_flds = qw{
    hostid
    host
    email
    profile
    license
    local_image
    valid
};

#
# Names of fields in the 'eps' table in the appropriate order for the later
# INSERT statement. Note that it omits the 'hostid' field.
#
my @eps_flds = qw{
    id
    date
    title
    summary
    notes
    series
    explicit
    license
    tags
    version
    valid
};

#
# Enable Unicode output mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";

#
# Load configuration data
#
my %config = %{ LoadFile($configfile) };

#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd  = $config{database}->{password};
$dbh
    = DBI->connect( "dbi:mysql:dbname=$dbname", $dbuser, $dbpwd,
    { AutoCommit => 1 } )
    or die $DBI::errstr;

#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# The expectation is that we have the following original tables:
# hosts         - the details of each host (contains some double entries
#                 "hostA and hostB")
# eps           - the details of all episodes, currently with a host id number
#                 against each one
#
# We also have the following new tables for the transition:
# new_hosts     - an empty copy of the 'hosts' table, InnoDB
# new_eps       - an empty copy of the 'eps' table without the 'hostid'
#                 column, InnoDB
# hosts_eps     - a mapping table for joining together the 'new_hosts' and
#                 'new_eps' tables, InnoDB with foreign keys
#
# See the file 'hosts_eps.sql' for the DDL which creates these tables.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

#-------------------------------------------------------------------------------
# Collect the entire 'hosts' table
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare(q{SELECT * FROM hosts ORDER BY hostid DESC});
$sth1->execute;
if ( $dbh->err ) {
    die $dbh->errstr;
}

#
# Grab the data as an arrayref of hashrefs
#
my $hosts = $sth1->fetchall_arrayref( {} );

#
# Make hashes keyed on the host name and on the id
#
%hosts_by_name = map { $_->{host}   => $_ } @{$hosts};
%hosts_by_id   = map { $_->{hostid} => $_ } @{$hosts};

$max_hostid = max( map { $_->{hostid} } values(%hosts_by_name) );

#-------------------------------------------------------------------------------
# Collect the entire 'eps' table
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare(q{SELECT * FROM eps ORDER BY id DESC});
$sth1->execute;
if ( $dbh->err ) {
    die $dbh->errstr;
}

#
# Grab the data as an arrayref of hashrefs
#
my $eps = $sth1->fetchall_arrayref( {} );

#
# Make a hash keyed on the episode number
#
%eps = map { $_->{id} => $_ } @{$eps};

#-------------------------------------------------------------------------------
# Walk the hash of hosts by name, finding double host entries. Stash the
# episode numbers against the hosts (but do it messily resulting in duplicates
# as a side effect)
#-------------------------------------------------------------------------------
foreach my $key ( keys(%hosts_by_name) ) {
    $hostid = $hosts_by_name{$key}->{hostid};

    #
    # Is this a double ("HostA and HostB") entry?
    #
    if ( @names = ( $key =~ /^([[:print:]]+) and ([[:print:]]+)$/ ) ) {
        printf "%3d: %s\n", $hosts_by_name{$key}->{hostid}, $key;

        #
        # Process the names picked out of the 'host' field
        #
        foreach my $name (@names) {
            if ( exists( $hosts_by_name{$name} ) ) {
                #
                # Known name,  report it
                #
                printf "\t%3d: %s\n", $hosts_by_name{$name}->{hostid}, $name;
                printf "Replace %d with %d\n",
                    $hosts_by_name{$key}->{hostid},
                    $hosts_by_name{$name}->{hostid};

                #
                # Collect all episodes relating to the double id ($hostid) and
                # add them to the known id ($hid)
                #
                $hid = $hosts_by_name{$name}->{hostid};
                $hosts_by_id{$hid}->{eps}
                    = collect_eps( $hostid, \%eps,
                    $hosts_by_id{$hid}->{eps} );

                #
                # Mark the double id as not valid
                #
                $hosts_by_id{$hostid}->{valid} = 0;
            }
            else {
                #
                # Unknown name, make a new host entry
                #
                print "\t'$name' not known\n";
                $max_hostid++;
                $hosts_by_id{$max_hostid} = {
                    'profile'     => '',
                    'local_image' => '0',
                    'hostid'      => $max_hostid,
                    'license'     => 'CC-BY-SA',
                    'host'        => $name,
                    'valid'       => '1',
                    'email'       => ''
                };

                #
                # Save all episodes for this name
                #
                $hid = $hosts_by_name{$key}->{hostid};
                $hosts_by_id{$max_hostid}->{eps}
                    = collect_eps( $hid, \%eps,
                    $hosts_by_id{$max_hostid}->{eps} );
            }
        }
    }
    else {
        #
        # Single host, just collect all their episodes
        #
        $hosts_by_id{$hostid}->{eps}
            = collect_eps( $hostid, \%eps, $hosts_by_id{$hostid}->{eps} );
    }
}

#-------------------------------------------------------------------------------
# Report on the structure we built, de-duplicating as we go
#-------------------------------------------------------------------------------
foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
    if ( exists( $hosts_by_id{$hid}->{eps} ) ) {
        #
        # De-duplicate the episode list
        #
        @{ $hosts_by_id{$hid}->{eps} }
            = sort { $a <=> $b } uniq( @{ $hosts_by_id{$hid}->{eps} } );

        #
        # Print the host details followed by the episodes
        #
        printf "Hostid: %d [%s,%d] (%d)\n", $hid,
            $hosts_by_id{$hid}->{host},
            $hosts_by_id{$hid}->{hostid},
            scalar( @{ $hosts_by_id{$hid}->{eps} } );
        foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) {
            printf "  Episode: %d\n", $ep;
        }
    }
}

#-------------------------------------------------------------------------------
# Turn the %hosts_by_id hash into database insert statements
#-------------------------------------------------------------------------------
my $sql1 = sprintf( "INSERT INTO new_hosts VALUES(%s)",
    join( ",", map { '?' } @host_flds ) );
$sth1 = $dbh->prepare($sql1);

my $sql2 = sprintf( "INSERT INTO new_eps VALUES(%s)",
    join( ",", map { '?' } @eps_flds ) );
$sth2 = $dbh->prepare($sql2);

$sth3 = $dbh->prepare(q{INSERT INTO hosts_eps VALUES(?,?)});

#
# The 'new_hosts' table
#
foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
    $sth1->execute( @{ $hosts_by_id{$hid} }{@host_flds} );
    if ( $dbh->err ) {
        die $dbh->errstr;
    }
}

#
# The 'new_eps' table
#
foreach my $eid ( sort { $a <=> $b } keys(%eps) ) {
    $sth2->execute( @{ $eps{$eid} }{@eps_flds} );
    if ( $dbh->err ) {
        die $dbh->errstr;
    }
}

#
# The 'hosts_eps' table
#
foreach my $hid ( sort { $a <=> $b } keys(%hosts_by_id) ) {
    if ( exists( $hosts_by_id{$hid}->{eps} ) ) {
        foreach my $ep ( @{ $hosts_by_id{$hid}->{eps} } ) {
            $sth3->execute( $hosts_by_id{$hid}->{hostid}, $ep );
            if ( $dbh->err ) {
                die $dbh->errstr;
            }
        }
    }
}

exit;

#===  FUNCTION  ================================================================
#         NAME: collect_eps
#      PURPOSE: Collect all the episodes relating to a hostid and return them,
#               along with the contents of $current as an array of hashrefs
#   PARAMETERS: $hostid         the host id we're interested in
#               $eps            hashref containing anonymous hashes keyed by
#                               episode number
#               $current        a reference to any existing array of episodes
#                               for this host id
#      RETURNS: A reference to the resulting array of anonymous hashes
#  DESCRIPTION:
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub collect_eps {
    my ( $hostid, $eps, $current ) = @_;
    my @host_eps;

    @host_eps = @{$current} if $current;

    foreach my $epsid ( keys(%$eps) ) {
        if ( $eps->{$epsid}->{hostid} == $hostid ) {
            push( @host_eps, $epsid );
        }
    }

    return \@host_eps;
}

# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker