hpr-tools/PostgreSQL_Database/copy_mysql_pg_2

2288 lines
69 KiB
Plaintext
Raw Permalink Normal View History

#!/usr/bin/env perl
#===============================================================================
#
# FILE: copy_mysql_pg_2
#
# USAGE: ./copy_mysql_pg_2
#
# DESCRIPTION: Copies HPR show data from the MariaDB database to an
# experimental PostgreSQL database (second version "HPR2")
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.1.5
# CREATED: 2017-10-23 19:11:48
# REVISION: 2019-06-01 16:10:23
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Getopt::Long;
use Pod::Usage;
use Config::General;
use List::MoreUtils qw{uniq apply};
use Text::CSV;
use DBI;
use SQL::Abstract;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.1.5';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/PostgreSQL_Database";
my $configfile1 = "$basedir/.hpr_db.cfg";
my $configfile2 = "$basedir/.hpr_pg2.cfg";
my $database3 = "$basedir/ia.db"; # soft link
my $email_template = 'host_%s@hackerpublicradio.org';
my $default_licence = 'CC-BY-SA';
my ( $dbh1, $sth1, $h1, $rv1 );
my ( $dbh2, $sth2, $h2, $rv2 );
my ( $dbh3, $sth3, $h3, $rv3 );
my (@phase_choices);
my ( %eps_tags, %data );
my @phases = (
'episodes', 'hosts', 'eh_xref', 'series', 'es_xref', 'tags',
'comments', 'archived', 'assets', 'epilogue'
);
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
my $DEF_DEBUG = 0;
#
# Process options
#
my %options;
Options( \%options );
#
# Default help
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 )
if ( $options{'help'} );
#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
my $cfgfile1
= ( defined( $options{config} ) ? $options{config} : $configfile1 );
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
#
# This option is a list, provided as a CSV
#
my $phase_choices = $options{phases};
if ( defined($phase_choices) ) {
#
# We have a list which we'll parse, validate, sort, make unique and filter
#
my $lcsv = Text::CSV_XS->new( { binary => 1, } );
if ( $lcsv->parse($phase_choices) ) {
# Sort fields
@phase_choices = uniq( sort { $a cmp $b } $lcsv->fields() );
# Trim leading and trailing spaces
@phase_choices = apply { $_ =~ s/(^\s*|\s*$)// } @phase_choices;
# Make a list of invalid keywords
my %tmp = map { $_ => 1 } @phases;
my @bad = grep { not exists $tmp{$_} } @phase_choices;
# Deal with all errors
die "Invalid list; no elements\n" if scalar(@phase_choices) == 0;
die "Invalid list; too many elements\n"
if scalar(@phase_choices) > scalar(@phases);
die "Invalid list elements: ", join( ",", @bad ) . "\n"
if scalar(@bad) > 0;
}
else {
die "Failed to parse -list='$phase_choices'\n"
. $lcsv->error_diag() . "\n";
}
}
else {
#
# By default we do all phases
#
@phase_choices = @phases;
}
#-------------------------------------------------------------------------------
# Configuration file for MySQL/MariaDB - load data
#-------------------------------------------------------------------------------
my $conf1 = Config::General->new(
-ConfigFile => $cfgfile1,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config1 = $conf1->getall();
my $conf2 = Config::General->new(
-ConfigFile => $configfile2,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config2 = $conf2->getall();
#-------------------------------------------------------------------------------
# Connect to the MariaDB database
#-------------------------------------------------------------------------------
my $dbtype1 = $config1{database}->{type} // 'mysql';
my $dbhost1 = $config1{database}->{host} // '127.0.0.1';
my $dbport1 = $config1{database}->{port} // 3306;
my $dbname1 = $config1{database}->{name};
my $dbuser1 = $config1{database}->{user};
my $dbpwd1 = $config1{database}->{password};
$dbh1
= DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1",
$dbuser1, $dbpwd1, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh1->{mysql_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Connect to the PostgreSQL database
#-------------------------------------------------------------------------------
my $dbtype2 = $config2{database}->{type} // 'Pg';
my $dbhost2 = $config2{database}->{host} // '127.0.0.1';
my $dbport2 = $config2{database}->{port} // 5432;
my $dbname2 = $config2{database}->{name};
my $dbuser2 = $config2{database}->{user};
my $dbpwd2 = $config2{database}->{password};
$dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2",
$dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh2->{pg_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Connect to the SQLite database
#-------------------------------------------------------------------------------
$dbh3 = DBI->connect( "dbi:SQLite:dbname=$database3", "", "" );
my %choices = map { $_ => 1 } @phase_choices;
#
# Perform phases in order, omitting those that are not in the list
#
for my $phase (@phases) {
#---------------------------------------------------------------------------
# Copy the 'eps' table to 'episodes'
#---------------------------------------------------------------------------
if ( $phase eq 'episodes' && exists( $choices{$phase} ) ) {
print "Build episodes table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'episodes' ) ) {
build_episodes_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'episodes' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Copy the 'hosts' table to 'hosts'
#---------------------------------------------------------------------------
elsif ( $phase eq 'hosts' && exists( $choices{$phase} ) ) {
print "Build hosts table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'hosts' ) ) {
build_hosts_table( $dbh1, $dbh2, $email_template, $verbose )
unless $dry_run;
}
else {
print "** Table 'hosts' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Generate the 'episodes_hosts_xref' table
#---------------------------------------------------------------------------
elsif ( $phase eq 'eh_xref' && exists( $choices{$phase} ) ) {
print "Build episodes_hosts_xref table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'episodes_hosts_xref' ) ) {
build_episodes_hosts_xref_table( $dbh1, $dbh2, $verbose )
unless $dry_run;
}
else {
print "** Table 'episodes_hosts_xref' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Copy the 'miniseries' table to 'series'
#---------------------------------------------------------------------------
elsif ( $phase eq 'series' && exists( $choices{$phase} ) ) {
print "Build series table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'series' ) ) {
build_series_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'series' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Generate the 'episodes_series_xref' table
#---------------------------------------------------------------------------
elsif ( $phase eq 'es_xref' && exists( $choices{$phase} ) ) {
print "Build episodes_series_xref table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'episodes_series_xref' ) ) {
build_episodes_series_xref_table( $dbh1, $dbh2, $verbose )
unless $dry_run;
}
else {
print "** Table 'episodes_series_xref' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Collect and store the id numbers and tags from the MySQL 'eps' table,
# then add them to the PostgreSQL tables.
#---------------------------------------------------------------------------
elsif ( $phase eq 'tags' && exists( $choices{$phase} ) ) {
print "Build tags and episodes_tags_xref tables\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'tags' )
&& check_table( $dbh2, 'episodes_tags_xref' ) )
{
unless ($dry_run) {
%eps_tags = %{ collect_eps_tags( $dbh1, $verbose ) };
if (%eps_tags) {
build_tags_table( $dbh2, $verbose, \%eps_tags );
}
}
}
else {
print "** Tables 'tags' and/or 'episodes_tags_xref' are not empty\n";
}
}
#---------------------------------------------------------------------------
# Copy the 'comments' table to 'comments'
#---------------------------------------------------------------------------
elsif ( $phase eq 'comments' && exists( $choices{$phase} ) ) {
print "Build comments table\n" if ( $verbose > 0 );
if (check_table($dbh2,'comments')) {
build_comments_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'comments' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Fill in archive-related fields the 'episodes' table from 'ia.db'
#---------------------------------------------------------------------------
elsif ( $phase eq 'archived' && exists( $choices{$phase} ) ) {
print "Copy archive-related fields to episodes table\n" if ( $verbose > 0 );
update_episodes_table( $dbh3, $dbh2, $verbose ) unless $dry_run;
}
#---------------------------------------------------------------------------
# Copy the 'assets' table in 'ia.db' to 'assets'
#---------------------------------------------------------------------------
elsif ( $phase eq 'assets' && exists( $choices{$phase} ) ) {
print "Build assets table\n" if ( $verbose > 0 );
if (check_table($dbh2,'assets')) {
build_assets_table( $dbh3, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'assets' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Perform the 'epilogue' actions
#---------------------------------------------------------------------------
elsif ( $phase eq 'epilogue' && exists( $choices{$phase} ) ) {
print "Perform epilogue actions\n" if ( $verbose > 0 );
#
# Resolve the "double host" problems. We have *not* copied these
# across from the MySQL 'hosts' table, so can resolve them now.
#
resolve_double_hosts( $dbh1, $dbh2, $email_template, $default_licence,
$verbose ) unless $dry_run;
#
# Determine the first show date per host, assuming that's when they
# were added to the database
#
compute_host_date_added($dbh2) unless $dry_run;
}
}
exit;
#=== FUNCTION ================================================================
# NAME: build_episodes_table
# PURPOSE: Copy the data from the MariaDB 'eps' table to the Pg
# 'episodes' table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_episodes_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my $count = 0;
$count += load_twat_episodes_table( $dbh1, $dbh2, 'twat', $verbose );
$count += load_eps_table( $dbh1, $dbh2, 'hpr', $verbose );
print "Added $count records to the 'episodes' table\n"
if ( $verbose > 0 );
#
# Set the sequence to the correct value
#
alter_seq( $dbh2, 'episodes', 'episode_id', 'episode_seq' );
}
#=== FUNCTION ================================================================
# NAME: load_twat_episodes_table
# PURPOSE: Load the 'twat_episodes' table from the MariaDB database into
# the PostgreSQL 'episodes' table.
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $key_prefix String to prefix the 'episode_key' field with
# $verbose Verbosity level
# RETURNS: Number of rows added
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub load_twat_episodes_table {
my ( $dbh1, $dbh2, $key_prefix, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Query to collect the entire 'twat_episodes' table
#
$sth1 = $dbh1->prepare('SELECT * FROM twat_episodes ORDER BY ep_num')
or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to insert into the PostgreSQL 'episodes' table
#
$sth2 = $dbh2->prepare(
q{INSERT INTO episodes
(episode_key, release_date, title, notes, explicit, license)
VALUES (?,date(to_timestamp(?)),?,?,TRUE,
id_in_licenses('CC-BY-SA'))}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'twat_episodes' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'twat_episodes' table rows writing them to the PostgreSQL
# 'episodes' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
sprintf( "%s%04d", $key_prefix, $h1->{ep_num} ),
$h1->{date},
coalesce( $h1->{topic}, '[undefined]' ),
coalesce( $h1->{writeup}, '<p>No notes</p>' ),
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records from 'twat_episodes'\n" if ( $verbose > 0 );
return $count;
}
#=== FUNCTION ================================================================
# NAME: load_eps_table
# PURPOSE: Load the 'eps' table from the MariaDB database into the
# PostgreSQL 'episodes' table.
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $key_prefix String to prefix the 'episode_key' field with
# $verbose Verbosity level
# RETURNS: Number of rows added
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub load_eps_table {
my ( $dbh1, $dbh2, $key_prefix, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Query to collect rows from the old dfatabase
#
$sth1 = $dbh1->prepare('SELECT * FROM eps ORDER BY id')
or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to insert rows into the new database
#
$sth2 = $dbh2->prepare(
q{INSERT INTO episodes
(episode_key, release_date, title, summary, notes, explicit,
license, duration, downloads)
VALUES (?,?,?,?,?,?,id_in_licenses(?),? * '1 second'::interval,?)}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'eps' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'eps' table rows writing them to the PostgreSQL 'episodes'
# table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
sprintf( "%s%04d", $key_prefix, $h1->{id} ),
$h1->{date},
$h1->{title},
nullif( $h1->{summary}, '^\s*$' ),
$h1->{notes},
$h1->{explicit},
$h1->{license},
$h1->{duration},
$h1->{downloads},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records from 'eps'\n" if ( $verbose > 0 );
return $count;
}
#=== FUNCTION ================================================================
# NAME: build_hosts_table
# PURPOSE: Copy the data from the Mariadb 'hosts' table to the Pg 'hosts'
# table
# PARAMETERS: $dbh1 Handle for the MariaDB table
# $dbh2 Handle for the Pg database
# $template Template for building the default email
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION: Copies records from one table into the other. However, some
# transformations are made along the way: blank emails and
# 'admin@hackerpublicradio.org' are converted to 'host_NNN@hpr',
# empty profiles and GPG strings are turned into NULL values,
# and licence settings are converted to the id of the licence
# in the 'licenses' table (in the database).
# The double host problem is an issue though. Since the email
# address field is unique we get errors since the double hosts
# have duplicate addresses.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_hosts_table {
my ( $dbh1, $dbh2, $template, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $rv, $count );
$sth1 = $dbh1->prepare('SELECT * FROM hosts') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare(
q{INSERT INTO hosts
(host_id, host, email, profile, license, local_image, gpg, espeak_name)
VALUES (?,?,?,?,id_in_licenses(?),?,?,?)}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
#
# Ignore "double host" entries
#
if ( $h1->{host} !~ /^(.+)\s+and\s+(.+)$/ ) {
$count++;
$sth2->execute(
$h1->{hostid},
$h1->{host},
default_email(
$h1->{email}, '^(\s*|admin@hackerpublicradio.org)$',
$template, $h1->{hostid}
),
nullif( $h1->{profile}, '^\s*$' ),
$h1->{license},
$h1->{local_image},
nullif( $h1->{gpg}, '^\s*$' ),
$h1->{espeak_name},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
}
print "Copied $count records\n" if ( $verbose > 0 );
#
# Set the sequence to the correct value
#
alter_seq( $dbh2, 'hosts', 'host_id', 'host_seq' );
}
#=== FUNCTION ================================================================
# NAME: build_episodes_hosts_xref_table
# PURPOSE: Generates the cross reference table by examining the 'eps' and
# 'hosts' tables in the MariaDB database.
# PARAMETERS: $dbh1 Handle for the MariaDB table
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_episodes_hosts_xref_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my $count = 0;
$count += link_twat_episodes( $dbh1, $dbh2, $verbose );
$count += link_hpr_episodes( $dbh1, $dbh2, $verbose );
print "Added a total of $count links\n" if ( $verbose > 0 );
}
#=== FUNCTION ================================================================
# NAME: link_twat_episodes
# PURPOSE: Generate cross reference links between TwaT episodes and hosts
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: The count of links
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub link_twat_episodes {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count, $episode_key );
#
# Query to find the TwaT episode and hostid from the old database. Use
# LEFT JOIN so we see when there's no host match.
#
$sth1 = $dbh1->prepare(
# q{SELECT t.ep_num,h.hostid
# FROM twat_episodes t, hosts h
# WHERE t.host = h.host}
q{SELECT t.ep_num, h.hostid
FROM twat_episodes t
LEFT JOIN hosts h ON (t.host = h.host)}
) or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to add a cross reference link to the new database
#
$sth2 = $dbh2->prepare(
q{INSERT INTO episodes_hosts_xref
SELECT e.episode_id, h.host_id
FROM episodes e, hosts h
WHERE e.episode_key = ? AND h.host_id = ?}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Collect all the TwaT episodes and host id numbers
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop through the result of the query, adding links to the new database.
# If the returned hostid is NULL then we don't have this host in the
# 'hosts' table and need to warn about it.
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$episode_key = sprintf( "twat%04d", $h1->{ep_num} );
if (defined($h1->{hostid})) {
$count++;
$sth2->execute( $episode_key, $h1->{hostid}, );
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
else {
warn "Unable to link $episode_key to a host\n";
}
}
print "Added $count links to TwaT episodes\n" if ( $verbose > 0 );
return $count;
}
#=== FUNCTION ================================================================
# NAME: link_hpr_episodes
# PURPOSE: Generate cross reference links between HPR episodes and hosts
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: The count of links
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub link_hpr_episodes {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count, $episode_key );
#
# Query to find the HPR episode and hostid from the old database
#
$sth1 = $dbh1->prepare(
q{SELECT e.id,h.hostid
FROM eps e, hosts h
WHERE e.hostid = h.hostid}
) or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to add a cross reference link to the new database
#
$sth2 = $dbh2->prepare(
q{INSERT INTO episodes_hosts_xref
SELECT e.episode_id, h.host_id
FROM episodes e, hosts h
WHERE e.episode_key = ? AND h.host_id = ?}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Collect all the HPR episodes and host id numbers
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop through the result of the query, adding links to the new database
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$episode_key = sprintf( "hpr%04d", $h1->{id} );
$sth2->execute( $episode_key, $h1->{hostid}, );
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Added $count links to HPR episodes\n" if ( $verbose > 0 );
return $count;
}
#=== FUNCTION ================================================================
# NAME: build_series_table
# PURPOSE: Copy the data from the Mariadb 'miniseries' table to the Pg
# 'series' table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_series_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
$sth1 = $dbh1->prepare('SELECT * FROM miniseries') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare('INSERT INTO series VALUES (?,?,?,?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'miniseries' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'miniseries' table rows writing them to the PostgreSQL
# 'series' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{id}, $h1->{name},
$h1->{description}, $h1->{private},
nullif( $h1->{image}, '^\s*$' ),
# $h1->{valid},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ($verbose > 0);
#
# Set the sequence to the correct value
#
alter_seq( $dbh2, 'series', 'series_id', 'series_seq' );
}
#=== FUNCTION ================================================================
# NAME: build_episodes_series_xref_table
# PURPOSE: Generates the cross reference table by examining the 'eps' and
# 'series' tables in the MariaDB database.
# PARAMETERS: $dbh1 Handle for the MariaDB table
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_episodes_series_xref_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count, $episode_key );
#
# Query to find the HPR episode and series id from the old database
#
$sth1 = $dbh1->prepare(
q{SELECT e.id AS epid, m.id AS msid
FROM eps e, miniseries m
WHERE e.series = m.id}
) or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to add a cross reference link to the new database
#
$sth2 = $dbh2->prepare(
q{INSERT INTO episodes_series_xref
SELECT e.episode_id, s.series_id
FROM episodes e, series s
WHERE e.episode_key = ? AND s.series_id = ?}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Collect all the HPR episodes and host id numbers (no TwaT shows are in
# series)
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop through the result of the query, adding links to the new database
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$episode_key = sprintf( "hpr%04d", $h1->{epid} );
$sth2->execute( $episode_key, $h1->{msid} );
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Added a total of $count series links\n" if ( $verbose > 0 );
}
#=== FUNCTION ================================================================
# NAME: build_tags_table
# PURPOSE: Using the data structure built from the MariaDB database
# populate the many-to-many table in the Pg database
# PARAMETERS: $dbh Handle for the Pg database
# $verbose Verbosity level
# $tag_hash Reference to a hash of episode ids and tags
# for each episode
# RETURNS: Nothing
# DESCRIPTION: Before being called the tags in the MariaDB database are
# gathered into a hash which is passed to this function as an
# argument. The hash is keyed by episode number and each value
# consists of an arrayref containing the tag strings. These tags
# are then processed to add to the PostgreSQL database.
# For each episode (in sorted order) the tag array is processed.
# If a tag already exists in the 'tags' table the tag id is
# stored for later, otherwise the tag is added to the 'tags'
# table. The tag id from either source is used to add to the
# cross reference table 'episodes_tags_xref'.
# Adding a link requires the provision of the episode key, which
# in this case will be 'hprNNNN' where 'NNNN' is a 4 digit zero
# padded number. This is converted to the episode id by
# a PostgreSQL function.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_tags_table {
my ( $dbh, $verbose, $tag_hash ) = @_;
my ( $sth1, $h1, $sth2, $sth3, $rv);
my ( $tags, $tid, $episode_key, $count1, $count2 );
#
# Query to find if a tag already exists in the PostgreSQL database
#
$sth1 = $dbh->prepare(q{SELECT * FROM tags WHERE tag = ?});
#
# Query to add a new tag
#
$sth2 = $dbh->prepare(q{INSERT INTO tags (tag) VALUES(?)});
#
# Query to add a new joining row
#
$sth3 = $dbh->prepare(
q{INSERT INTO episodes_tags_xref
VALUES(id_in_episodes(?),?)}
);
$count1 = $count2 = 0;
foreach my $id ( sort { $a <=> $b } keys( %{$tag_hash} ) ) {
#
# The episode key is now more than a number
#
$episode_key = sprintf("hpr%04d", $id);
#
# Get the array of tags for this episode id
#
$tags = $tag_hash->{$id};
#
# Loop through the array of tags (using an integer so we can index the
# current tag)
#
for my $i ( 0 .. $#$tags ) {
#
# Look to see if this tag exists
#
$sth1->execute( $tags->[$i] );
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# If it's already in the table just store the id for later
# otherwise add a new entry
#
if ( $h1 = $sth1->fetchrow_hashref ) {
$tid = $h1->{tag_id};
}
else {
#
# Add the tag to 'tags'
#
$count1++;
$rv = $sth2->execute( $tags->[$i] );
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
#
# Get the id number of the inserted tag
#
$tid = $dbh->last_insert_id( undef, undef, undef, undef,
{ sequence => 'tag_seq' } );
}
$count2++;
$rv = $sth3->execute( $episode_key, $tid );
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
}
}
if ($verbose > 0) {
print "Added $count1 tags\n";
print "Added $count2 cross references\n";
}
}
#=== FUNCTION ================================================================
# NAME: collect_eps_tags
# PURPOSE: Collects the tags from the eps.tags field
# PARAMETERS: $dbh Database handle
# $verbose Verbosity level
# RETURNS: A reference to the hash created by collecting all the tags
# DESCRIPTION: Queries the MariaDB 'eps' table for all of the rows containing
# tags, returning the comma-separated list with the id number.
# Each CSV list is then parsed and the result turned into a hash
# keyed on the id number and containing a sorted array of tags.
# If the level of verbosity is greater than 2 the tags hash is
# dumped (ironically, as a CSV list!).
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub collect_eps_tags {
my ( $dbh, $verbose ) = @_;
my ( $status, @fields, %hash );
my ( $sth, $h );
#
# For parsing the field as CSV
#
my $csv = Text::CSV_XS->new;
#
# Query the MariaDB 'eps' table for all the id and tags
#
$sth = $dbh->prepare(
q{SELECT id,tags FROM eps
WHERE length(tags) > 0
ORDER BY id}
) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
$sth->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Loop through what we got
#
while ( $h = $sth->fetchrow_hashref ) {
#
# Parse the tag list
#
$status = $csv->parse( $h->{tags} );
unless ($status) {
#
# Report any errors
#
print "Parse error on episode ", $h->{id}, "\n";
print $csv->error_input(), "\n";
next;
}
@fields = $csv->fields();
next unless (@fields);
#
# Trim all tags (don't alter $_ when doing it)
#
@fields = map {
my $t = $_;
$t =~ s/(^\s+|\s+$)//g;
$t;
} @fields;
#print "$h->{id}: ",join(",",@fields),"\n";
#
# Save the id and its tags, sorted for comparison
#
$hash{ $h->{id} } = [ sort @fields ];
}
#
# Dump all id numbers and tags if the verbose level is high enough
#
if ( $verbose >= 2 ) {
print "\nTags collected from the 'eps' table\n\n";
foreach my $id ( sort { $a <=> $b } keys(%hash) ) {
printf "%04d: %s\n", $id, join( ",", @{ $hash{$id} } );
}
}
return \%hash;
}
#=== FUNCTION ================================================================
# NAME: build_comments_table
# PURPOSE: Copy the data from the Mariadb 'comments' table to the Pg
# 'comments' table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_comments_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Query to collect comments from the old database
#
$sth1 = $dbh1->prepare('SELECT * FROM comments') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to insert comment data into the new database
#
$sth2 = $dbh2->prepare(
q{INSERT INTO comments VALUES (?,id_in_episodes(?),?,?,?,?,?)}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'comments' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'comments' table rows writing them to the PostgreSQL
# 'comments' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{id},
sprintf("hpr%04d",$h1->{eps_id}),
$h1->{comment_timestamp},
nullif( $h1->{comment_author_name}, '^\s*$' ),
nullif( $h1->{comment_title}, '^\s*$' ),
$h1->{comment_text},
$h1->{last_changed},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count comments\n" if ($verbose > 0);
#
# Set the sequence to the correct value
#
alter_seq( $dbh2, 'comments', 'comment_id', 'comment_seq' );
}
#=== FUNCTION ================================================================
# NAME: update_episodes_table
# PURPOSE: Initialise archiv-related fields in the Pg 'episodes' table
# from the SQLite 'episodes' table
# PARAMETERS: $dbh1 Handle for the SQLite database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub update_episodes_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Query to collect episodes from the SQLite database
#
$sth1 = $dbh1->prepare('SELECT * FROM episodes') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to insert fields into the Pg database 'episodes' table. We just
# write whatever comes in, no check for existing data.
#
$sth2 = $dbh2->prepare(
q{UPDATE episodes SET
archived = ?,
archive_date = ?,
IA_URL = ?
WHERE episode_key = ?}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query SQLite for the entire 'episodes' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'episodes' table rows writing selected fields to the
# PostgreSQL 'episodes' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{uploaded},
$h1->{archive_date},
$h1->{IA_URL},
sprintf("hpr%04d",$h1->{id}),
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Updated $count rows in the episodes table\n" if ($verbose > 0);
}
#=== FUNCTION ================================================================
# NAME: build_assets_table
# PURPOSE: Copy the data from the SQLite 'assets' table to the Pg
# 'assets' table
# PARAMETERS: $dbh1 Handle for the SQLite database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_assets_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Query to collect assets from the SQLite database
#
$sth1 = $dbh1->prepare('SELECT * FROM assets') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to insert asset data into the new database
#
$sth2 = $dbh2->prepare(
q{INSERT INTO assets VALUES (DEFAULT,id_in_episodes(?),?,?,?)}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query SQLite for the entire 'assets' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'assets' table rows writing them to the PostgreSQL
# 'assets' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
sprintf("hpr%04d",$h1->{episode_id}),
$h1->{URL},
$h1->{filename},
$h1->{uploaded},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count assets\n" if ($verbose > 0);
#
# Set the sequence to the correct value
#
alter_seq( $dbh2, 'assets', 'asset_id', 'asset_seq' );
}
#=== FUNCTION ================================================================
# NAME: resolve_double_hosts
# PURPOSE: Turn any double host entries in the 'hosts' table into
# "singletons"
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $template Template for building the default email
# $licence string version of the short CC licence name
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub resolve_double_hosts {
my ( $dbh1, $dbh2, $template, $licence, $verbose ) = @_;
my ( $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 );
my ( %doubles, @h, %hosts, $count, $unknown );
#
# Search the 'hosts' table in the old database for "host1 and host2"
# strings
#
$count = find_double_hosts($dbh1, \%doubles, \%hosts);
#
# If no doubles there's nothing to do
#
if ($count == 0) {
print "No doubles found\n" if ($verbose > 0);
return;
}
#
# Scan the list of individual hosts we stored earlier and find them in
# the 'hosts' table (assuming that the hostnames we have extracted match
# exactly)
#
$unknown = find_hosts($dbh1, \%doubles, \%hosts);
#
# Allocate all unknown hosts a host id in the PostgreSQL database, and give an
# unique email address.
#
if ( $unknown > 0 ) {
register_unknown($dbh2, \%doubles, \%hosts, $template,
$licence);
}
#
# Now %doubles contains all the original names and host ids and %hosts
# contains the parsed out names and their ids. We can look for shows
# attributed to the double hosts and re-attribute them to the single
# hosts.
#
link_double_twat_episodes($dbh1,$dbh2,\%doubles,$verbose);
link_double_hpr_episodes($dbh1,$dbh2,\%doubles,$verbose);
return;
}
#=== FUNCTION ================================================================
# NAME: find_double_hosts
# PURPOSE: Search the 'hosts' table in the old database for "host1 and
# host2" strings.
# PARAMETERS: $dbh handle for the MariaDB database
# $doubles hashref where details will be stored
# $hosts hashref to contain single host details
# RETURNS: The number of double hosts found
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub find_double_hosts {
my ($dbh, $doubles, $hosts) = @_;
my ($sth, $h, @h, $count);
#
# Find the double hosts in the old database
#
$sth = $dbh->prepare(
q{ SELECT hostid, host FROM hosts
WHERE host regexp '[[:<:]]and[[:>:]]'
ORDER BY hostid
}
) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
return 0;
}
$sth->execute;
if ( $dbh->err ) {
die $dbh->errstr;
}
#
# Loop through the doubles we found, counting them as we go
#
$count = 0;
while ( $h = $sth->fetchrow_hashref ) {
#
# Each hash value is a hash containing the original id, and, in a sub-hash
# the replacement ids
#
$doubles->{$h->{host}} = {
double => $h->{hostid},
singles => {},
};
#
# Parse the double host string into an array
#
@h = ( $h->{host} =~ /^(.+)\s+and\s+(.+)$/ );
#
# Initialise the entries for %doubles and %hosts
#
for my $host (@h) {
$doubles->{$h->{host}}->{singles}->{$host} = undef;
unless ( exists( $hosts->{$host} ) ) {
$hosts->{$host} = 0;
}
}
$count++;
}
return $count;
}
#=== FUNCTION ================================================================
# NAME: find_hosts
# PURPOSE: Find individual hosts in the old MariaDB database using the
# names we found as double hosts
# PARAMETERS: $dbh handle for the MariaDB database
# $doubles hashref where details will be stored
# $hosts hashref to contain single host details
# RETURNS: The number of unknown hosts
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub find_hosts {
my ($dbh, $doubles, $hosts) = @_;
my ($sth, $h, $rv, @h, $unknown);
#
# Query to find the host by name in the old database
#
$sth = $dbh->prepare(q{SELECT hostid FROM hosts WHERE host = ?})
or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Scan the list of individual hosts we stored earlier and find them in
# the 'hosts' table (assuming that the hostnames we have extracted match
# exactly)
#
$unknown = 0;
foreach my $host ( sort(keys(%{$hosts})) ) {
$rv = $sth->execute($host);
if ( $dbh->err ) {
die $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
if ($rv) {
#
# Found id for host. Save in the %doubles hash
#
$h = $sth->fetchrow_hashref;
$hosts->{$host} = $h->{hostid};
save_hostid($doubles,$host,$h->{hostid});
}
else {
#
# Can't find this host
#
$unknown++;
}
}
return $unknown;
}
#=== FUNCTION ================================================================
# NAME: register_unknown
# PURPOSE: Create an entry in the 'hosts' table of the new database
# wherever there's a host that's unknown (has a zero id in the
# %hosts hash).
# PARAMETERS: $dbh handle for the MariaDB database
# $doubles hashref where details will be stored
# $hosts hashref to contain single host details
# $email_template template for making an email address
# $licence string version of the short CC licence name
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub register_unknown {
my ( $dbh, $doubles, $hosts, $email_template, $licence ) = @_;
my ( $sth, $h, $rv, $new_email );
#
# PostgreSQL query to register an unknown host
#
$sth = $dbh->prepare(
q{INSERT INTO hosts (host,email,license)
VALUES (?,?,id_in_licenses(?))}
) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
foreach my $host ( sort( keys( %{$hosts} ) ) ) {
if ( $hosts->{$host} == 0 ) {
#
# Write a row to the 'hosts' table. There has to be an email,
# but we can't compute one until the row has been added.
#
$rv = $sth->execute( $host, 'placeholder', $licence );
if ( $dbh->err ) {
die $dbh->errstr;
}
#
# Save the id number we just generated as the new host id
#
my $newid = $dbh->last_insert_id( undef, undef, undef, undef,
{ sequence => 'host_seq' } );
$hosts->{$host} = $newid;
save_hostid( $doubles, $host, $newid );
printf "Created new host %s (%d)\n", $host, $newid
if ( $verbose > 0 );
#
# Give the new host entry a default email address
#
$new_email = sprintf( $email_template, $newid );
$rv = $dbh->do( 'UPDATE hosts SET email = ? WHERE host_id = ?',
undef, $new_email, $newid );
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
warn "Failed to set email address $new_email for $host\n"
unless ( defined($rv) );
}
}
}
#=== FUNCTION ================================================================
# NAME: link_double_twat_episodes
# PURPOSE: Find and link TwaT episodes with double hosts
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $doubles hashref holding double host details
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub link_double_twat_episodes {
my ( $dbh1, $dbh2, $doubles, $verbose ) = @_;
my ( $sth, $h, $rv );
#
# Query to find TwaT shows with particular host names in the old database
#
$sth = $dbh1->prepare(
q{
SELECT ep_num
FROM twat_episodes
WHERE host = ?
}
) or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Loop through the double hosts we collected
#
foreach my $double ( sort( keys(%{$doubles}) ) ) {
#
# Extract saved double and corresponding single details
#
my ( $doubleid, @newhosts ) = (
$doubles->{$double}->{double},
keys( %{ $doubles->{$double}->{singles} } )
);
#
# Find TwaT shows marked as belonging to this double-host (in the old
# database)
#
$sth->execute($double);
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Process all the shows we found
#
while ( $h = $sth->fetchrow_hashref ) {
my $eps_id = $h->{ep_num};
my $episode_key = sprintf("twat%04d",$eps_id);
#
# If one insert into the new database fails they all do
#
$dbh2->begin_work();
#
# Add links for the single hosts to the new database
#
foreach my $host (@newhosts) {
$rv = $dbh2->do(
q{INSERT INTO episodes_hosts_xref
SELECT e.episode_id, h.host_id
FROM episodes e, hosts h
WHERE e.episode_key = ? AND h.host = ?},
undef, $episode_key, $host);
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
unless ( defined($rv) ) {
warn "Problem adding link to episodes_hosts_xref for "
. "$episode_key,$host\n";
}
else {
printf "Linked show %s for host %s\n", $episode_key, $host;
}
}
#
# Commit the inserts above
#
$dbh2->commit();
}
}
}
#=== FUNCTION ================================================================
# NAME: link_double_hpr_episodes
# PURPOSE: Find and link HPR episodes with double hosts
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $doubles hashref holding double host details
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub link_double_hpr_episodes {
my ( $dbh1, $dbh2, $doubles, $verbose ) = @_;
my ( $sth, $h, $rv );
#
# Query to find HPR shows with particular host ids in the old database
#
$sth = $dbh1->prepare(
q{
SELECT id AS eps_id
FROM eps
WHERE hostid = ?
}
) or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Loop through the double hosts we collected
#
foreach my $double ( sort( keys(%{$doubles}) ) ) {
#
# Extract saved double and corresponding single details
#
my ( $doubleid, @newhosts ) = (
$doubles->{$double}->{double},
keys( %{ $doubles->{$double}->{singles} } )
);
#
# Find HPR shows marked as belonging to this double-host (in the old
# database)
#
$sth->execute($doubleid);
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Process all the shows we found
#
while ( $h = $sth->fetchrow_hashref ) {
my $eps_id = $h->{eps_id};
my $episode_key = sprintf("hpr%04d",$eps_id);
#
# If one insert into the new database fails they all do
#
$dbh2->begin_work();
#
# Add links for the single hosts to the new database
#
foreach my $host (@newhosts) {
$rv = $dbh2->do(
q{INSERT INTO episodes_hosts_xref
SELECT e.episode_id, h.host_id
FROM episodes e, hosts h
WHERE e.episode_key = ? AND h.host = ?},
undef, $episode_key, $host);
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
unless ( defined($rv) ) {
warn "Problem adding link to episodes_hosts_xref for "
. "$episode_key,$host\n";
}
else {
printf "Linked show %s for host %s\n", $episode_key, $host;
}
}
#
# Commit the inserts above
#
$dbh2->commit();
}
}
}
#=== FUNCTION ================================================================
# NAME: save_hostid
# PURPOSE: Saves the host id after searching for the key in the %doubles
# hash
# PARAMETERS: $doubles hashref to %doubles
# $host host key
# $hostid host id number
# RETURNS: Nothing
# DESCRIPTION: Searches the %doubles hash for particular keys in the
# 'singles' sub-hash. If found saves the corresponding host id
# there.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub save_hostid {
my ( $doubles, $host, $hostid ) = @_;
foreach my $key ( keys(%$doubles) ) {
if ( exists( $doubles->{$key}->{singles}->{$host} ) ) {
$doubles->{$key}->{singles}->{$host} = $hostid;
}
}
}
#=== FUNCTION ================================================================
# NAME: compute_host_date_added
# PURPOSE: Determine the 'hosts.when_added' field once the database is
# fully populated.
# PARAMETERS: $dbh Handle for the PostgreSQL database
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub compute_host_date_added {
my ($dbh) = @_;
my $rv;
#
# Allocate when_added values where possible
#
$rv = $dbh->do(
q{
UPDATE hosts
SET when_added = sq.when_added
FROM (
SELECT h.host_id,min(e.release_date) AS when_added
FROM episodes e
JOIN episodes_hosts_xref eh ON (e.episode_id = eh.episode_id)
JOIN hosts h ON (h.host_id = eh.host_id)
GROUP BY h.host_id
ORDER by min(e.release_date)) AS sq
WHERE hosts.host_id = sq.host_id
}
);
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
print "Added $rv dates to the 'when_added' column\n";
}
#=== FUNCTION ================================================================
# NAME: alter_seq
# PURPOSE: Ensure the PostgreSQL sequence associated with a table has the
# correct value.
# PARAMETERS: $dbh Handle for the PostgreSQL database
# $table Table name for the query
# $idname Id field name (primary key)
# $sequence Sequence name
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub alter_seq {
my ( $dbh, $table, $idname, $sequence ) = @_;
my ( $sth, $h, $rv, $maxid );
#
# Find the maximum id number in the table
#
$sth = $dbh->prepare("SELECT max($idname) as maxid FROM $table")
or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
$sth->execute;
if ( $dbh->err ) {
die $dbh->errstr;
}
#
# Save the maximum
#
if ( $h = $sth->fetchrow_hashref ) {
$maxid = $h->{maxid};
$sth->finish;
}
#
# Reset the sequence one more than the maximum
#
$maxid++;
$rv = $dbh->do("ALTER SEQUENCE $sequence RESTART WITH $maxid");
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
warn "Failed to reset $sequence\n" unless (defined($rv));
return;
}
#=== FUNCTION ================================================================
# NAME: check_table
# PURPOSE: Check that a given PostgreSQL table is empty
# PARAMETERS: $dbh Handle for the PostgreSQL database
# $table Name of table
# RETURNS: True if empty, otherwise false
# DESCRIPTION: Simply perform a query on the nominated table which counts
# rows. If the table does not exist a DBI method will fail (the
# execute?), so we treat this as a 'no empty' to make the caller
# take error action.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub check_table {
my ( $dbh, $table ) = @_;
my ( $sth, $h, $count );
$sth = $dbh->prepare("SELECT count(*) AS count FROM $table")
or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
return 0;
}
$sth->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
return 0;
}
if ( $h = $sth->fetchrow_hashref ) {
$count = $h->{count};
$sth->finish;
}
return $count == 0;
}
#=== FUNCTION ================================================================
# NAME: default_email
# PURPOSE: Make a default email address for hosts with none
# PARAMETERS: $email Original email address
# $regex Regular expression to check the email against
# $template Template for building the default
# $hostid Host id number to use in the default
# RETURNS: The email address to be used
# DESCRIPTION: If the email address matches a regular expression then
# generate a default from the template and the host id,
# otherwise just return the address untouched.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub default_email {
my ( $email, $regex, $template, $hostid ) = @_;
return (
$email =~ $regex
? sprintf( $template, $hostid )
: $email
);
}
#=== FUNCTION ================================================================
# NAME: nullif
# PURPOSE: Tests a value and makes it 'undef' (equivalent to NULL in the
# database) if it matches a regular expression.
# PARAMETERS: $value Value to test
# $regex Regular expression to match against
# RETURNS: 'undef' if the values match, otherwise the original value
# DESCRIPTION: This is very simple, just a wrapper around the test.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub nullif {
my ( $value, $regex ) = @_;
return $value unless defined($value);
return ( $value =~ $regex ? undef : $value );
}
#=== FUNCTION ================================================================
# NAME: coalesce
# PURPOSE: To find the first defined argument and return it
# PARAMETERS: Arbitrary number of arguments
# RETURNS: The first defined argument or undef if there are none
# DESCRIPTION: Modelled on the SQL function of the same name. It takes a list
# of arguments, scans it for the first one that is not undefined
# and returns it. If an argument is defined and it's an arrayref
# then the referenced array is returned comma-delimited. This
# allows calls such as "coalesce($var,'undef')" which returns
# the value of $var if it's defined, and 'undef' if not and
# doesn't break anything along the way.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub coalesce {
foreach (@_) {
if ( defined($_) ) {
if ( ref($_) eq 'ARRAY' ) {
return join( ',', @{$_} );
}
else {
return $_;
}
}
}
return; # implicit undef
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION:
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = ( "help", "config=s", "debug=i", "dry-run!", "verbose+",
"phases=s" );
if ( !GetOptions( $optref, @options ) ) {
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
copy_mysql_pg - copy the HPR database from MySQL to PostgreSQL
=head1 VERSION
This documentation refers to B<copy_mysql_pg> version 0.1.5
=head1 USAGE
copy_mysql_pg2 -verbose
copy_mysql_pg2 -config=.hpr_livedb.cfg -verbose
copy_mysql_pg2 -verbose \
-phase='episodes,hosts,eh_xref,series,es_xref,tags,comments,archived,assets,epilogue'
=head1 OPTIONS
=over 8
=item B<-help>
Prints a brief help message describing the usage of the program, and then exits.
=item B<-debug=N>
Selects a level of debugging. Debug information consists of a line or series
of lines prefixed with the characters 'D>':
=over 4
=item B<0>
No debug output is generated: this is the default
=back
=item B<-[no]dry-run>
When enabled (B<-dry-run>) the script will report what it would do, but will
make no changes to the target database. In the default state (B<-nodry-run>)
then changes are made.
=item B<-verbose>
Makes the script verbose resulting in the production of more information about
what it is doing.
The option may be repeated to increase the level of verbosity. The levels are:
=over 4
=item B<0>
No output is generated (apart from errors and warnings if appropriate). This
is the default level.
=item B<1>
A message is generated per phase to indicate which actions are taking place.
This includes a report of the number of rows copied from the MySQL database to
the PostgreSQL one.
=item B<2>
Following the process of collecting the CSV tags from the MySQL 'episodes' table
these are reported as a list per episode. This output will be long!
=back
=item B<-phase=CSV_LIST>
This option allows the phases of the copying process to be selected
individually. The argument B<CSV_LIST> is a list of phase names, which have to
be typed exactly. The order is not important since the script will scan its
version of the list of phases in its own order and will check to see if each
has been selected.
The phase names are:
=over 4
=item B<episodes>
Causes the B<episodes> table to be filled.
=item B<hosts>
Causes the B<hosts> table to be filled.
=item B<eh_xref>
Causes the B<episodes_hosts_xref> table to be filled.
=item B<series>
Causes the B<series> table to be filled.
=item B<es_xref>
Causes the B<episodes_series_xref> table to be filled.
=item B<tags>
Causes the B<tags> and the B<episodes_tags_xref> tables to be filled.
=item B<comments>
Causes the B<comments> table to be filled.
=item B<archived>
Causes the archive-related fields of the B<episodes> table to be filled from
the SQLite database B<ia.db>.
=item B<assets>
Causes the B<assets> table to be filled from the SQLite database B<ia.db>.
=item B<epilogue>
Runs various tasks that can only be carried out after the database has been
populated.
=back
=back
=item B<-config=FILE>
This option allows an alternative configuration file to be used. This file
defines the location of the database, its port, its name and the username and
password to be used to access it. This feature was added to allow the script
to access alternative databases or the live database over an SSH tunnel.
See the CONFIGURATION AND ENVIRONMENT section below for the file format.
If the option is omitted the default file is used: B<.hpr_db.cfg>
=head1 DESCRIPTION
A full description of the application and its features.
May include numerous subsections (i.e. =head2, =head3, etc.)
=head1 DIAGNOSTICS
A list of every error and warning message that the application can generate
(even the ones that will "never happen"), with a full explanation of each
problem, one or more likely causes, and any suggested remedies. If the
application generates exit status codes (e.g. under Unix) then list the exit
status associated with each error.
=head1 CONFIGURATION AND ENVIRONMENT
The script obtains the credentials it requires to open the HPR database from
a configuration file. The name of the file it expects is B<.hpr_db.cfg> in the
directory holding the script. To change this will require changing the script.
The configuration file format is as follows:
<database>
host = 127.0.0.1
port = PORT
name = DATABASE
user = USERNAME
password = PASSWORD
</database>
=head1 DEPENDENCIES
A list of all the other modules that this module relies upon, including any
restrictions on versions, and an indication whether these required modules are
part of the standard Perl distribution, part of the module's distribution,
or must be installed separately.
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to <Maintainer name(s)> (<contact address>)
Patches are welcome.
=head1 AUTHOR
Dave Morriss (Dave.Morriss@gmail.com)
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2017-2019 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker