#!/usr/bin/env perl
#===============================================================================
#
#         FILE: find_double_hosts
#
#        USAGE: ./find_double_hosts
#
#  DESCRIPTION: Find HPR shows with two hosts (host is "A and B"), find the
#               hosts if possible and flag updates to the database to
#               represent the dual nature.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: NOTE: Don't run this. Its functionality is now part of
#               copy_mysql_pg_2 and is being applied in the 'epilogue' phase.
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 0.0.2
#      CREATED: 2017-10-13 19:17:51
#     REVISION: 2017-10-13 19:19:43
#
#===============================================================================

use 5.010;
use strict;
use warnings;
use utf8;

use Config::General;
use DBI;

use Data::Dumper;

#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';

#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;

#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir    = "$ENV{HOME}/HPR/Database";
my $configfile1 = "$basedir/.hpr_db.cfg";
my $configfile2 = "$basedir/.hpr_pg.cfg";

my $email_template  = 'host_%s@hackerpublicradio.org';
my $default_licence = 'CC-BY-SA';

my ( $dbh1, $dbh2, $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 );

my ( %doubles, @h, %hosts, $unknown, $default_email );

#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";

#
# Load database configuration data
#
my $conf1 = Config::General->new(
    -ConfigFile      => $configfile1,
    -InterPolateVars => 1,
    -ExtendedAccess  => 1
);
my %config1 = $conf1->getall();

my $conf2 = Config::General->new(
    -ConfigFile      => $configfile2,
    -InterPolateVars => 1,
    -ExtendedAccess  => 1
);
my %config2 = $conf2->getall();

#-------------------------------------------------------------------------------
# Connect to the MariaDB database
#-------------------------------------------------------------------------------
my $dbtype1 = $config1{database}->{type} // 'mysql';
my $dbhost1 = $config1{database}->{host} // '127.0.0.1';
my $dbport1 = $config1{database}->{port} // 3306;
my $dbname1 = $config1{database}->{name};
my $dbuser1 = $config1{database}->{user};
my $dbpwd1  = $config1{database}->{password};
$dbh1
    = DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1",
    $dbuser1, $dbpwd1, { AutoCommit => 1 } )
    or die $DBI::errstr;

#
# Enable client-side UTF8
#
$dbh1->{mysql_enable_utf8} = 1;

#-------------------------------------------------------------------------------
# Connect to the PostgreSQL database
#-------------------------------------------------------------------------------
my $dbtype2 = $config2{database}->{type} // 'Pg';
my $dbhost2 = $config2{database}->{host} // '127.0.0.1';
my $dbport2 = $config2{database}->{port} // 5432;
my $dbname2 = $config2{database}->{name};
my $dbuser2 = $config2{database}->{user};
my $dbpwd2  = $config2{database}->{password};
$dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2",
    $dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } )
    or die $DBI::errstr;

#
# Enable client-side UTF8
#
$dbh2->{pg_enable_utf8} = 1;

#-------------------------------------------------------------------------------
# Query preparation
#-------------------------------------------------------------------------------
#
# MariaDB query to find double hosts
#
my $sql1 = q{
    SELECT hostid, host FROM hosts
    WHERE host regexp '[[:<:]]and[[:>:]]'
    ORDER BY hostid
};

$sth1 = $dbh1->prepare($sql1) or die $DBI::errstr;
if ( $dbh1->err ) {
    warn $dbh1->errstr;
}

#
# MariaDB query to find the host by name
#
$sth2 = $dbh1->prepare(q{SELECT hostid FROM hosts WHERE host REGEXP ?})
    or die $DBI::errstr;
if ( $dbh1->err ) {
    warn $dbh1->errstr;
}

#
# PostgreSQL query to register an unknown host
#
$sth3
    = $dbh2->prepare(q{INSERT INTO hosts (host,email,license) VALUES (?,?,?)})
    or die $DBI::errstr;
if ( $dbh2->err ) {
    warn $dbh2->errstr;
}

#
# PostgreSQL query to find shows with particular host ids
#
$sth4 = $dbh2->prepare(
    q{
        SELECT e.id AS eps_id
        FROM episodes e
        JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id)
        JOIN hosts h ON (h.id = eh.hosts_id)
        WHERE h.id = ?
    }
) or die $DBI::errstr;
if ( $dbh2->err ) {
    warn $dbh2->errstr;
}

#-------------------------------------------------------------------------------
# Find all the "double hosts"
#-------------------------------------------------------------------------------
#
# Query MariaDB for the target hosts
#
$sth1->execute;
if ( $dbh1->err ) {
    die $dbh1->errstr;
}

#
# Loop through the list of double hostnames and parse them out. Save the
# originals in the %doubles hash and the parsed names in the %hosts hash.
#
while ( $h1 = $sth1->fetchrow_hashref ) {
    #
    # Each hash value is a hash containing the original id, and, in a sub-hash
    # the replacement ids
    #
    $doubles{$h1->{host}} = {
        double => $h1->{hostid},
        singles => {},
    };

    #
    # Parse the double host string
    #
    @h = ( $h1->{host} =~ /^(.+)\s+and\s+(.+)$/ );

    printf "%-4d %s", $h1->{hostid}, $h1->{host};
    print " [", join( ",", @h ), "]\n";

    #
    # Initialise the entries for %doubles and %hosts
    #
    for my $host (@h) {
        $doubles{$h1->{host}}->{singles}->{$host} = undef;
        unless ( exists( $hosts{$host} ) ) {
            $hosts{$host} = 0;
        }
    }
}

print '-' x 80,"\n";

#-------------------------------------------------------------------------------
# Find the single hosts in the 'hosts' table
#-------------------------------------------------------------------------------
#
# Scan the list of individual hosts and find them in the 'hosts' table
#
$unknown = 0;
foreach my $host ( sort(keys(%hosts)) ) {
    $rv2 = $sth2->execute("^$host\$");
    if ( $dbh1->err ) {
        die $dbh1->errstr;
    }
    $rv2 = 0 if ( $rv2 eq '0E0' );

    if ($rv2) {
        $h2 = $sth2->fetchrow_hashref;
        print "Found id for $host: ", $h2->{hostid}, "\n";
        $hosts{$host} = $h2->{hostid};
        save_hostid(\%doubles,$host,$h2->{hostid});
    }
    else {
        print "Can't find $host\n";
        $unknown++;
    }
}

#print Dumper(\%hosts),"\n";

print '-' x 80,"\n";

#-------------------------------------------------------------------------------
# Allocate all unknown hosts a host id in the PostgreSQL database, and give an
# unique email address.
#-------------------------------------------------------------------------------
if ( $unknown > 0 ) {
    print "Registering $unknown hosts\n";

    foreach my $host ( sort( keys(%hosts) ) ) {
        if ( $hosts{$host} == 0 ) {
            $rv3 = $sth3->execute( $host, undef, $default_licence );
            if ( $dbh2->err ) {
                die $dbh2->errstr;
            }

            #
            # Write a row to the 'hosts' table and save the id number
            # generated
            #
            my $newid = $dbh2->last_insert_id( undef, undef, undef, undef,
                { sequence => 'host_seq' } );
            $hosts{$host} = $newid;
            save_hostid(\%doubles,$host,$newid);
            print "Host $host added with id $newid\n";

            #
            # Give the new host entry a default email address
            #
            $default_email = sprintf($email_template,$newid);
            $rv3 = $dbh2->do( 'UPDATE hosts SET email = ? WHERE id = ?',
                undef, $default_email, $newid );
            if ( $dbh2->err ) {
                warn $dbh2->errstr;
            }
            $rv3 = 0 if ( $rv3 eq '0E0' );

            warn "Failed to set email address $default_email for $host\n"
                unless ( defined($rv3) );

        }
    }
}

print '-' x 80,"\n";

#-------------------------------------------------------------------------------
# Now %doubles contains all the original names and host ids and %hosts
# contains the parsed out names and their ids. We can look for shows
# attributed to the first set and re-attribute them to the second set.
#-------------------------------------------------------------------------------
print "Changing host associations for shows with two hosts\n";
foreach my $double ( sort( keys(%doubles) ) ) {
    print "Processing $double\n";
    my ( $doubleid, @newids ) = (
        $doubles{$double}->{double},
        values( %{ $doubles{$double}->{singles} } )
    );
    print "  Original id: $doubleid\n";
    print "  Replacements: ", join( ", ", @newids ), "\n";

    #
    # Find shows marked as belonging to this double-host
    #
    $sth4->execute($doubleid);
    if ( $dbh2->err ) {
        die $dbh2->errstr;
    }

    #
    # Process all the shows
    #
    while ( $h4 = $sth4->fetchrow_hashref ) {
        my $eps_id = $h4->{eps_id};
        print "  Show $eps_id is ascribed to host $doubleid\n";

        $dbh2->begin_work();

        #
        # Delete the xref link for the double host
        #
        $rv4
            = $dbh2->do(
            'DELETE FROM episodes_hosts_xref WHERE episodes_id = ?',
            undef, $eps_id );
        if ( $dbh2->err ) {
            warn $dbh2->errstr;
        }
        $rv4 = 0 if ( $rv4 eq '0E0' );

        if ( defined($rv4) ) {
            print "  Deleted entry from episodes_hosts_xref for $eps_id\n";
        }
        else {
            warn "Problem deleting from episodes_hosts_xref for $eps_id\n";
        }

        #
        # Add links for the single hosts
        #
        foreach my $hid (@newids) {
            $rv4 = $dbh2->do( 'INSERT INTO episodes_hosts_xref VALUES (?,?)',
                undef, $eps_id, $hid );
            if ( $dbh2->err ) {
                warn $dbh2->errstr;
            }
            $rv4 = 0 if ( $rv4 eq '0E0' );

            if ( defined($rv4) ) {
                print "  Added entry to episodes_hosts_xref values ",
                    "$eps_id,$hid\n";
            }
            else {
                warn "Problem adding to episodes_hosts_xref values "
                    . "$eps_id,$hid\n";
            }

        }

        #
        # Commit the delete/inserts above
        #
        $dbh2->commit();

    }
    print '~' x 80, "\n";

    #
    # Delete the double host (NOTE: This will fail due to referential
    # integrity if the DELETE above failed, so there is scope for debris to be
    # left around)
    #
    $rv4 = $dbh2->do( 'DELETE FROM hosts WHERE id = ?', undef, $doubleid );
    if ( $dbh2->err ) {
        warn $dbh2->errstr;
    }
    $rv4 = 0 if ( $rv4 eq '0E0' );

    if ( defined($rv4) ) {
        print "  Deleted entry from hosts for id $doubleid ($double)\n";
    }
    else {
        warn "Problem deleting from hosts for id $doubleid ($double)\n";
    }

}

print '-' x 80,"\n";

exit;

#===  FUNCTION  ================================================================
#         NAME: save_hostid
#      PURPOSE: Saves the host id after searching for the key in the %doubles
#               hash
#   PARAMETERS: $doubles                hashref to %doubles
#               $host                   host key
#               $hostid                 host id number
#      RETURNS: Nothing
#  DESCRIPTION: Searches the %doubles hash for particular keys in the
#               'singles' sub-hash. If found saves the corresponding host id
#               there.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub save_hostid {
    my ( $doubles, $host, $hostid ) = @_;

    foreach my $key ( keys(%$doubles) ) {
        if ( exists( $doubles->{$key}->{singles}->{$host} ) ) {
            $doubles->{$key}->{singles}->{$host} = $hostid;
        }
    }
}

# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker