1
0
forked from HPR/hpr-tools
hpr-tools/Database/copy_mysql_pg

1439 lines
41 KiB
Plaintext
Raw Permalink Normal View History

#!/usr/bin/env perl
#===============================================================================
#
# FILE: copy_mysql_pg
#
# USAGE: ./copy_mysql_pg
#
# DESCRIPTION: Copies HPR show data from the MariaDB database to an
# experimental PostgreSQL database
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.4
# CREATED: 2017-03-15 18:50:08
# REVISION: 2017-10-17 22:37:08
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Getopt::Long;
use Pod::Usage;
use Config::General;
use List::MoreUtils qw{uniq apply};
use Text::CSV;
use DBI;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.4';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Database";
my $configfile1 = "$basedir/.hpr_db.cfg";
my $configfile2 = "$basedir/.hpr_pg.cfg";
my $email_template = 'host_%s@hackerpublicradio.org';
my ( $dbh1, $sth1, $h1, $rv1 );
my ( $dbh2, $sth2, $h2, $rv2 );
my (@phase_choices);
my ( %eps_tags, %data );
my @phases = (
'episodes', 'hosts', 'eh_xref', 'series', 'es_xref', 'tags',
'comments', 'twat', 'epilogue'
);
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#
# Load database configuration data
#
my $conf1 = Config::General->new(
-ConfigFile => $configfile1,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config1 = $conf1->getall();
my $conf2 = Config::General->new(
-ConfigFile => $configfile2,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config2 = $conf2->getall();
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
my $DEF_DEBUG = 0;
#
# Process options
#
my %options;
Options( \%options );
#
# Default help
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 )
if ( $options{'help'} );
#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
#
# This option is a list, provided as a CSV
#
my $phase_choices = $options{phases};
if ( defined($phase_choices) ) {
#
# We have a list which we'll parse, validate, sort, make unique and filter
#
my $lcsv = Text::CSV_XS->new( { binary => 1, } );
if ( $lcsv->parse($phase_choices) ) {
# Sort fields
@phase_choices = uniq( sort { $a cmp $b } $lcsv->fields() );
# Trim leading and trailing spaces
@phase_choices = apply { $_ =~ s/(^\s*|\s*$)// } @phase_choices;
# Make a list of invalid keywords
my %tmp = map { $_ => 1 } @phases;
my @bad = grep { not exists $tmp{$_} } @phase_choices;
# Deal with all errors
die "Invalid list; no elements\n" if scalar(@phase_choices) == 0;
die "Invalid list; too many elements\n"
if scalar(@phase_choices) > scalar(@phases);
die "Invalid list elements: ", join( ",", @bad ) . "\n"
if scalar(@bad) > 0;
}
else {
die "Failed to parse -list='$phase_choices'\n"
. $lcsv->error_diag() . "\n";
}
}
else {
#
# By default we do all phases
#
@phase_choices = @phases;
}
#-------------------------------------------------------------------------------
# Connect to the MariaDB database
#-------------------------------------------------------------------------------
my $dbtype1 = $config1{database}->{type} // 'mysql';
my $dbhost1 = $config1{database}->{host} // '127.0.0.1';
my $dbport1 = $config1{database}->{port} // 3306;
my $dbname1 = $config1{database}->{name};
my $dbuser1 = $config1{database}->{user};
my $dbpwd1 = $config1{database}->{password};
$dbh1
= DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1",
$dbuser1, $dbpwd1, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh1->{mysql_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Connect to the PostgreSQL database
#-------------------------------------------------------------------------------
my $dbtype2 = $config2{database}->{type} // 'Pg';
my $dbhost2 = $config2{database}->{host} // '127.0.0.1';
my $dbport2 = $config2{database}->{port} // 5432;
my $dbname2 = $config2{database}->{name};
my $dbuser2 = $config2{database}->{user};
my $dbpwd2 = $config2{database}->{password};
$dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2",
$dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh2->{pg_enable_utf8} = 1;
my %choices = map { $_ => 1 } @phase_choices;
#
# Perform phases in order, omitting those that are not in the list
#
for my $phase (@phases) {
#---------------------------------------------------------------------------
# Copy the 'eps' table to 'episodes'
#---------------------------------------------------------------------------
if ( $phase eq 'episodes' && exists( $choices{$phase} ) ) {
print "Build episodes table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'episodes' ) ) {
build_episodes_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'episodes' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Copy the 'hosts' table to 'hosts'
#---------------------------------------------------------------------------
elsif ( $phase eq 'hosts' && exists( $choices{$phase} ) ) {
print "Build hosts table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'hosts' ) ) {
build_hosts_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'hosts' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Generate the 'episodes_hosts_xref' table
#---------------------------------------------------------------------------
elsif ( $phase eq 'eh_xref' && exists( $choices{$phase} ) ) {
print "Build episodes_hosts_xref table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'episodes_hosts_xref' ) ) {
build_episodes_hosts_xref_table( $dbh1, $dbh2, $verbose )
unless $dry_run;
}
else {
print "** Table 'episodes_hosts_xref' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Copy the 'miniseries' table to 'series'
#---------------------------------------------------------------------------
elsif ( $phase eq 'series' && exists( $choices{$phase} ) ) {
print "Build series table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'series' ) ) {
build_series_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'series' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Generate the 'episodes_series_xref' table
#---------------------------------------------------------------------------
elsif ( $phase eq 'es_xref' && exists( $choices{$phase} ) ) {
print "Build episodes_series_xref table\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'episodes_series_xref' ) ) {
build_episodes_series_xref_table( $dbh1, $dbh2, $verbose )
unless $dry_run;
}
else {
print "** Table 'episodes_series_xref' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Collect and store the id numbers and tags from the 'eps' table, then add
# them to the PostgreSQL tables.
#---------------------------------------------------------------------------
elsif ( $phase eq 'tags' && exists( $choices{$phase} ) ) {
print "Build tags and episodes_tags_xref tables\n" if ( $verbose > 0 );
if ( check_table( $dbh2, 'tags' )
&& check_table( $dbh2, 'episodes_tags_xref' ) )
{
unless ($dry_run) {
%eps_tags = %{ collect_eps_tags( $dbh1, $verbose ) };
if (%eps_tags) {
build_tags_table( $dbh2, $verbose, \%eps_tags );
}
}
}
else {
print "** Tables 'tags' and/or 'episodes_tags_xref' are not empty\n";
}
}
#---------------------------------------------------------------------------
# Copy the 'comments' table to 'comments'
#---------------------------------------------------------------------------
elsif ( $phase eq 'comments' && exists( $choices{$phase} ) ) {
print "Build comments table\n" if ( $verbose > 0 );
if (check_table($dbh2,'comments')) {
build_comments_table( $dbh1, $dbh2, $verbose ) unless $dry_run;
}
else {
print "** Table 'comments' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Perform the 'twat' actions
#---------------------------------------------------------------------------
elsif ( $phase eq 'twat' && exists( $choices{$phase} ) ) {
print "Perform twat actions\n" if ( $verbose > 0 );
#
# Incorporate the TwaT tables (assuming they exist)
#
if (check_table($dbh2,'twat_hosts')) {
load_twat_hosts($dbh1,$dbh2,$verbose) unless $dry_run;
}
else {
print "** Table 'twat_hosts' is not empty\n";
}
if (check_table($dbh2,'twat_episodes')) {
load_twat_episodes($dbh1,$dbh2,$verbose) unless $dry_run;
}
else {
print "** Table 'twat_episodes' is not empty\n";
}
}
#---------------------------------------------------------------------------
# Perform the 'epilogue' actions
#---------------------------------------------------------------------------
elsif ( $phase eq 'epilogue' && exists( $choices{$phase} ) ) {
print "Perform epilogue actions\n" if ( $verbose > 0 );
#
# Determine the first show date per host, assuming that's when they
# were added to the database
#
compute_host_date_added($dbh2) unless $dry_run;
}
}
exit;
#=== FUNCTION ================================================================
# NAME: build_episodes_table
# PURPOSE: Copy the data from the MariaDB 'eps' table to the Pg
# 'episodes' table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_episodes_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
$sth1 = $dbh1->prepare('SELECT * FROM eps') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare('INSERT INTO episodes VALUES (?,?,?,?,?,?,?,?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'eps' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'eps' table rows writing them to the PostgreSQL 'episodes'
# table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{id}, $h1->{date},
$h1->{title}, nullif( $h1->{summary}, '^\s*$' ),
$h1->{notes}, $h1->{explicit},
$h1->{license}, $h1->{duration},
$h1->{downloads},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ($verbose > 0);
#
# Set the sequence to the correct value
#
alter_seq($dbh2,'episodes','episode_seq');
}
#=== FUNCTION ================================================================
# NAME: build_hosts_table
# PURPOSE: Copy the data from the Mariadb 'hosts' table to the Pg 'hosts'
# table
# PARAMETERS: $dbh1 Handle for the MariaDB table
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_hosts_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $rv, $count );
$sth1 = $dbh1->prepare('SELECT * FROM hosts') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare(
q{INSERT INTO hosts
(id, host, email, profile, license, local_image, gpg, valid)
VALUES (?,?,?,?,?,?,?,?)}
) or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{hostid},
$h1->{host},
default_email(
$h1->{email}, '^(\s*|admin@hackerpublicradio.org)$',
$email_template, $h1->{hostid}
),
nullif( $h1->{profile}, '^\s*$' ),
$h1->{license},
$h1->{local_image},
nullif( $h1->{gpg}, '^\s*$' ),
$h1->{valid},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ( $verbose > 0 );
#
# Set the sequence to the correct value
#
alter_seq( $dbh2, 'hosts', 'host_seq' );
}
#=== FUNCTION ================================================================
# NAME: build_episodes_hosts_xref_table
# PURPOSE: Generates the cross reference table by examining the 'eps' and
# 'hosts' tables in the MariaDB database.
# PARAMETERS: $dbh1 Handle for the MariaDB table
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_episodes_hosts_xref_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
$sth1
= $dbh1->prepare(
'SELECT e.id,h.hostid FROM eps e, hosts h WHERE e.hostid = h.hostid')
or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare('INSERT INTO episodes_hosts_xref VALUES (?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute( $h1->{id}, $h1->{hostid}, );
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ($verbose > 0);
}
#=== FUNCTION ================================================================
# NAME: build_series_table
# PURPOSE: Copy the data from the Mariadb 'miniseries' table to the Pg
# 'series' table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_series_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
$sth1 = $dbh1->prepare('SELECT * FROM miniseries') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare('INSERT INTO series VALUES (?,?,?,?,?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'miniseries' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'miniseries' table rows writing them to the PostgreSQL
# 'series' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{id}, $h1->{name},
$h1->{description}, $h1->{private},
nullif( $h1->{image}, '^\s*$' ),
$h1->{valid},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ($verbose > 0);
#
# Set the sequence to the correct value
#
alter_seq($dbh2,'series','series_seq');
}
#=== FUNCTION ================================================================
# NAME: build_episodes_series_xref_table
# PURPOSE: Generates the cross reference table by examining the 'eps' and
# 'series' tables in the MariaDB database.
# PARAMETERS: $dbh1 Handle for the MariaDB table
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_episodes_series_xref_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
$sth1
= $dbh1->prepare(
'SELECT e.id AS epid, m.id AS msid FROM eps e, miniseries m WHERE e.series = m.id')
or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare('INSERT INTO episodes_series_xref VALUES (?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute( $h1->{epid}, $h1->{msid}, );
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ($verbose > 0);
}
#=== FUNCTION ================================================================
# NAME: build_tags_table
# PURPOSE: Using the data structure built from the MariaDB database
# populate the many-to-many table in the Pg database
# PARAMETERS: $dbh Handle for the Pg database
# $verbose Verbosity level
# $tag_hash Reference to a hash of episode ids and tags
# for each episode
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_tags_table {
my ( $dbh, $verbose, $tag_hash ) = @_;
my ( $sth1, $h1, $sth2, $sth3, $rv, $tags, $tid, $count1, $count2 );
#
# Query to find if a tag already exists
#
$sth1 = $dbh->prepare(q{SELECT * FROM tags WHERE tag = ?});
#
# Query to add a new tag
#
$sth2 = $dbh->prepare(q{INSERT INTO tags (tag) VALUES(?)});
#
# Query to add a new joining row
#
$sth3 = $dbh->prepare(q{INSERT INTO episodes_tags_xref VALUES(?,?)});
$count1 = $count2 = 0;
foreach my $id ( sort { $a <=> $b } keys( %{$tag_hash} ) ) {
#
# Get the array of tags for this episode id
#
$tags = $tag_hash->{$id};
#
# Loop through the array of tags (using an integer so we can index the
# current tag)
#
for my $i ( 0 .. $#$tags ) {
#
# Look to see if this tag exists
#
$sth1->execute( $tags->[$i] );
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# If it's already in the table just store the id for later
# otherwise add a new entry
#
if ( $h1 = $sth1->fetchrow_hashref ) {
$tid = $h1->{id};
}
else {
#
# Add the tag to 'tags'
#
$count1++;
$rv = $sth2->execute( $tags->[$i] );
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
#
# Get the id number of the inserted tag
#
$tid = $dbh->last_insert_id( undef, undef, undef, undef,
{ sequence => 'tag_seq' } );
}
$count2++;
$rv = $sth3->execute( $id, $tid );
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
}
}
if ($verbose > 0) {
print "Added $count1 tags\n";
print "Added $count2 cross references\n";
}
}
#=== FUNCTION ================================================================
# NAME: collect_eps_tags
# PURPOSE: Collects the tags from the eps.tags field
# PARAMETERS: $dbh Database handle
# $verbose Verbosity level
# RETURNS: A reference to the hash created by collecting all the tags
# DESCRIPTION: Queries the MariaDB 'eps' table for all of the rows containing
# tags, returning the comma-separated list with the id number.
# Each CSV list is then parsed and the result turned into a hash
# keyed on the id number and containing a sorted array of tags.
# If the level of verbosity is greater than 2 the tags has is
# dumped (ironically, as a CSV list!).
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub collect_eps_tags {
my ( $dbh, $verbose ) = @_;
my ( $status, @fields, %hash );
my ( $sth, $h );
#
# For parsing the field as CSV
#
my $csv = Text::CSV_XS->new;
#
# Query the MariaDB 'eps' table for all the id and tags
#
$sth = $dbh->prepare(
q{SELECT id,tags FROM eps
WHERE length(tags) > 0
ORDER BY id}
) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
$sth->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Loop through what we got
#
while ( $h = $sth->fetchrow_hashref ) {
#
# Parse the tag list
#
$status = $csv->parse( $h->{tags} );
unless ($status) {
#
# Report any errors
#
print "Parse error on episode ", $h->{id}, "\n";
print $csv->error_input(), "\n";
next;
}
@fields = $csv->fields();
next unless (@fields);
#
# Trim all tags (don't alter $_ when doing it)
#
@fields = map {
my $t = $_;
$t =~ s/(^\s+|\s+$)//g;
$t;
} @fields;
#print "$h->{id}: ",join(",",@fields),"\n";
#
# Save the id and its tags, sorted for comparison
#
$hash{ $h->{id} } = [ sort @fields ];
}
#
# Dump all id numbers and tags if the verbose level is high enough
#
if ( $verbose >= 2 ) {
print "\nTags collected from the 'eps' table\n\n";
foreach my $id ( sort { $a <=> $b } keys(%hash) ) {
printf "%04d: %s\n", $id, join( ",", @{ $hash{$id} } );
}
}
return \%hash;
}
#=== FUNCTION ================================================================
# NAME: build_comments_table
# PURPOSE: Copy the data from the Mariadb 'comments' table to the Pg
# 'comments' table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub build_comments_table {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
$sth1 = $dbh1->prepare('SELECT * FROM comments') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
$sth2 = $dbh2->prepare('INSERT INTO comments VALUES (?,?,?,?,?,?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'comments' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'comments' table rows writing them to the PostgreSQL
# 'comments' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{id},
$h1->{eps_id},
$h1->{comment_timestamp},
nullif( $h1->{comment_author_name}, '^\s*$' ),
nullif( $h1->{comment_title}, '^\s*$' ),
$h1->{comment_text},
$h1->{last_changed},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ($verbose > 0);
#
# Set the sequence to the correct value
#
alter_seq($dbh2,'comments','comment_seq');
}
#=== FUNCTION ================================================================
# NAME: load_twat_hosts
# PURPOSE: Copy the temporary twat_hosts table from the MySQL database
# into the PostgreSQL equivalent ready for merging with the HPR
# data
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub load_twat_hosts {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Copy the 'twat_hosts' table
#
$sth1 = $dbh1->prepare('SELECT * FROM twat_hosts') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Omit the 'id' here because it's an addition for PostgreSQL
#
$sth2
= $dbh2->prepare(
'INSERT INTO twat_hosts (host,email,website,repeat) VALUES (?,?,?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'comments' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'twat_hosts' table rows writing them to the PostgreSQL
# 'twat_hosts' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{host},
nullif( $h1->{email}, '^\s*$' ),
nullif( $h1->{website}, '^\s*$' ),
$h1->{repeat},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ( $verbose > 0 );
}
#=== FUNCTION ================================================================
# NAME: load_twat_episodes
# PURPOSE: Copy the temporary twat_episodes table from the MySQL database
# into the PostgreSQL equivalent ready for merging with the HPR
# data
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub load_twat_episodes {
my ( $dbh1, $dbh2, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $count );
#
# Copy the 'twat_episodes' table
#
$sth1 = $dbh1->prepare('SELECT * FROM twat_episodes') or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Omit the 'id' here because it's an addition for PostgreSQL
#
$sth2
= $dbh2->prepare(
'INSERT INTO twat_episodes VALUES (?,?,?,?,?,?)')
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
#
# Query MariaDB for the entire 'comments' table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though 'twat_hosts' table rows writing them to the PostgreSQL
# 'twat_hosts' table
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
$count++;
$sth2->execute(
$h1->{ep_num},
$h1->{date},
$h1->{host},
$h1->{topic},
nullif( $h1->{writeup}, '^\s*$' ),
$h1->{url},
);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
}
print "Copied $count records\n" if ( $verbose > 0 );
}
#=== FUNCTION ================================================================
# NAME: compute_host_date_added
# PURPOSE: Determine the 'hosts.date_added' field once the database is
# fully populated.
# PARAMETERS: $dbh Handle for the PostgreSQL database
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub compute_host_date_added {
my ($dbh) = @_;
my $rv;
#
# Allocate date_added values where possible
#
$rv = $dbh->do(
q{
UPDATE hosts
SET date_added = sq.date_added
FROM (
SELECT h.id,min(e.release_date) AS date_added
FROM episodes e
JOIN episodes_hosts_xref eh ON (e.id = eh.episodes_id)
JOIN hosts h ON (h.id = eh.hosts_id)
GROUP BY h.id
ORDER by min(e.release_date)) AS sq
WHERE hosts.id = sq.id
}
);
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
print "Added $rv dates to the 'date_added' column\n";
}
#=== FUNCTION ================================================================
# NAME: alter_seq
# PURPOSE: Ensure the PostgreSQL sequence associated with a table has the
# correct value.
# PARAMETERS: $dbh Handle for the PostgreSQL database
# $table Table name for the query
# $sequence Sequence name
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub alter_seq {
my ( $dbh, $table, $sequence ) = @_;
my ( $sth, $h, $rv, $maxid );
#
# Find the maximum id number in the table
#
$sth = $dbh->prepare("SELECT max(id) as maxid FROM $table")
or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
$sth->execute;
if ( $dbh->err ) {
die $dbh->errstr;
}
#
# Save the maximum
#
if ( $h = $sth->fetchrow_hashref ) {
$maxid = $h->{maxid};
$sth->finish;
}
#
# Reset the sequence one more than the maximum
#
$maxid++;
$rv = $dbh->do("ALTER SEQUENCE $sequence RESTART WITH $maxid");
if ( $dbh->err ) {
warn $dbh->errstr;
}
$rv = 0 if ( $rv eq '0E0' );
warn "Failed to reset $sequence\n" unless (defined($rv));
return;
}
#=== FUNCTION ================================================================
# NAME: check_table
# PURPOSE: Check that a given PostgreSQL table is empty
# PARAMETERS: $dbh Handle for the PostgreSQL database
# $table Name of table
# RETURNS: True if empty, otherwise false
# DESCRIPTION: Simply perform a query on the nominated table which counts
# rows. If the table does not exist a DBI method will fail (the
# execute?), so we treat this as a 'no empty' to make the caller
# take error action.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub check_table {
my ( $dbh, $table ) = @_;
my ( $sth, $h, $count );
$sth = $dbh->prepare("SELECT count(*) AS count FROM $table")
or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
return 0;
}
$sth->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
return 0;
}
if ( $h = $sth->fetchrow_hashref ) {
$count = $h->{count};
$sth->finish;
}
return $count == 0;
}
#=== FUNCTION ================================================================
# NAME: default_email
# PURPOSE: Make a default email address for hosts with none
# PARAMETERS: $email Original email address
# $regex Regular expression to check the email against
# $template Template for building the default
# $hostid Host id number to use in the default
# RETURNS: The email address to be used
# DESCRIPTION: If the email address matches a regular expression then
# generate a default from the template and the host id,
# otherwise just return the address untouched.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub default_email {
my ( $email, $regex, $template, $hostid ) = @_;
return (
$email =~ $regex
? sprintf( $template, $hostid )
: $email
);
}
#=== FUNCTION ================================================================
# NAME: nullif
# PURPOSE: Tests a value and makes it 'undef' (equivalent to NULL in the
# database) if it matches a regular expression.
# PARAMETERS: $value Value to test
# $regex Regular expression to match against
# RETURNS: 'undef' if the values match, otherwise the original value
# DESCRIPTION: This is very simple, just a wrapper around the test.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub nullif {
my ( $value, $regex ) = @_;
return $value unless defined($value);
return ( $value =~ $regex ? undef : $value );
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION:
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = ( "help", "debug=i", "dry-run!", "verbose+", "phases=s" );
if ( !GetOptions( $optref, @options ) ) {
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
copy_mysql_pg - copy the HPR database from MySQL to PostgreSQL
=head1 VERSION
This documentation refers to B<copy_mysql_pg> version 0.0.4
=head1 USAGE
copy_mysql_pg -verbose
copy_mysql_pg -verbose -verbose
copy_mysql_pg -verbose \
-phase='episodes,hosts,eh_xref,series,es_xref,tags,comments,epilogue'
=head1 OPTIONS
=over 8
=item B<-help>
Prints a brief help message describing the usage of the program, and then exits.
=item B<-debug=N>
Selects a level of debugging. Debug information consists of a line or series
of lines prefixed with the characters 'D>':
=over 4
=item B<0>
No debug output is generated: this is the default
=back
=item B<-[no]dry-run>
When enabled (B<-dry-run>) the script will report what it would do, but will
make no changes to the target database. In the default state (B<-nodry-run>)
then changes are made.
=item B<-verbose>
Makes the script verbose resulting in the production of more information about
what it is doing.
The option may be repeated to increase the level of verbosity. The levels are:
=over 4
=item B<0>
No output is generated (apart from errors and warnings if appropriate). This
is the default level.
=item B<1>
A message is generated per phase to indicate which actions are taking place.
This includes a report of the number of rows copied from the MySQL database to
the PostgreSQL one.
=item B<2>
Following the process of collecting the CSV tags from the MySQL 'episodes' table
these are reported as a list per episode. This output will be long!
=back
=item B<-phase=CSV_LIST>
This option allows the phases of the copying process to be selected
individually. The argument B<CSV_LIST> is a list of phase names, which have to
be typed exactly. The order is not important since the script will scan its
version of the list of phases in order and will check to see if each has been
selected.
The phase names are:
=over 4
=item B<episodes>
Causes the B<episodes> table to be filled.
=item B<hosts>
Causes the B<hosts> table to be filled.
=item B<eh_xref>
Causes the B<episodes_hosts_xref> table to be filled.
=item B<series>
Causes the B<series> table to be filled.
=item B<es_xref>
Causes the B<episodes_series_xref> table to be filled.
=item B<tags>
Causes the B<tags> and the B<episodes_tags_xref> tables to be filled.
=item B<comments>
Causes the B<comments> table to be filled.
=item B<epilogue>
Runs various tasks that can only be carried out after the database has been
populated.
=back
=back
=head1 DESCRIPTION
A full description of the application and its features.
May include numerous subsections (i.e. =head2, =head3, etc.)
=head1 DIAGNOSTICS
A list of every error and warning message that the application can generate
(even the ones that will "never happen"), with a full explanation of each
problem, one or more likely causes, and any suggested remedies. If the
application generates exit status codes (e.g. under Unix) then list the exit
status associated with each error.
=head1 CONFIGURATION AND ENVIRONMENT
A full explanation of any configuration system(s) used by the application,
including the names and locations of any configuration files, and the
meaning of any environment variables or properties that can be set. These
descriptions must also include details of any configuration language used
=head1 DEPENDENCIES
A list of all the other modules that this module relies upon, including any
restrictions on versions, and an indication whether these required modules are
part of the standard Perl distribution, part of the module's distribution,
or must be installed separately.
=head1 INCOMPATIBILITIES
A list of any modules that this module cannot be used in conjunction with.
This may be due to name conflicts in the interface, or competition for
system or program resources, or due to internal limitations of Perl
(for example, many modules that use source code filters are mutually
incompatible).
=head1 BUGS AND LIMITATIONS
A list of known problems with the module, together with some indication
whether they are likely to be fixed in an upcoming release.
Also a list of restrictions on the features the module does provide:
data types that cannot be handled, performance issues and the circumstances
in which they may arise, practical limitations on the size of data sets,
special cases that are not (yet) handled, etc.
The initial template usually just has:
There are no known bugs in this module.
Please report problems to <Maintainer name(s)> (<contact address>)
Patches are welcome.
=head1 AUTHOR
<Author name(s)> (<contact address>)
=head1 LICENCE AND COPYRIGHT
Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
Followed by whatever licence you wish to release it under.
For Perl code that is often just:
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker