1
0
forked from HPR/hpr-tools
hpr-tools/PostgreSQL_Database/update_mysql_pg_2

1266 lines
42 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env perl
#===============================================================================
#
# FILE: update_mysql_pg_2
#
# USAGE: ./update_mysql_pg_2
#
# DESCRIPTION: Performs updates on the PostgreSQL Database 'HPR2'.
# ** Incomplete. Do not use! **
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2019-05-14 12:49:26
# REVISION: 2019-10-07 15:03:06
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Getopt::Long;
use Pod::Usage;
use Config::General;
use List::MoreUtils qw{uniq apply};
use Date::Manip::Delta;
use DateTime;
use DateTime::Format::Pg;
use DateTime::Format::Duration;
use Text::CSV;
use DBI;
use SQL::Abstract;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/PostgreSQL_Database";
my $configfile1 = "$basedir/.hpr_db.cfg";
my $configfile2 = "$basedir/.hpr_pg2.cfg";
my $database3 = "$basedir/ia.db"; # soft link
my $email_template = 'host_%s@hackerpublicradio.org';
my $default_licence = 'CC-BY-SA';
my ( $dbh1, $sth1, $h1, $rv1 );
my ( $dbh2, $sth2, $h2, $rv2 );
my ( $dbh3, $sth3, $h3, $rv3 );
my (@phase_choices);
my @phases = (
'episodes', 'hosts', 'eh_xref', 'series', 'es_xref', 'tags',
'comments', 'archived', 'assets', 'epilogue'
);
my $licenses;
#
# Shows parity between the MySQL tables and fields and the PostgreSQL ones.
#
# Organisation:
# hash: named from the two table names
# ----
# key: '_fields'
# value: array of the MySQL field names in desired order
# ----
# key: '_MSQL'
# value: SQL to be used to query the MySQL database when scanning for
# updates
# ----
# key: '_PGSQL'
# value: SQL to be used to query the PostgreSQL database when looking
# for the record in a table corresponding to the MySQL one
# ----
# key: '_PK'
# value: arrayref containing the names of the primary key fields of the
# MySQL and PostgreSQL tables
# ----
# key: '_PGTABLE'
# value: the name of the PostgreSQL table
# ----
# key: name of MySQL field (as listed in '_fields')
# value: array of 2-4 elements
# 0: name of Pg field for comparison purposes
# 1: function to manipulate one field into the other, or undef if
# a straight copy
# 2: name of Pg field in the table
# 3: function to transform the MySQL field to the Pg one
#
my %table_maps = (
# --------------------------------------------------------------------------
'eps_episodes' => {
'_fields' => [
'id', 'date', 'title', 'duration',
'summary', 'notes', 'explicit', 'license',
'downloads'
],
'_MSQL' =>
# q{SELECT * FROM eps WHERE id BETWEEN 850 AND 900},
# q{SELECT * FROM eps WHERE id BETWEEN 2501 AND 2850 ORDER BY id},
# q{SELECT * FROM eps WHERE id = 700},
q{SELECT * FROM eps WHERE status != 'reserved' ORDER BY id},
'_PGSQL' => q{
SELECT e.*,l.short_name AS license_short_name
FROM episodes e
JOIN licenses l ON e.license = l.license_id
WHERE episode_key = ?
},
'_INSERT' => q{
INSERT INTO episodes
(episode_key, release_date, title, duration, summary, notes,
explicit, license, downloads, status)
VALUES (?,?,?,?,?,?,?,?,?,'posted')
},
'_PK' => ['id','episode_key'],
'_PGTABLE' => 'episodes',
id => [
'episode_key',
sub {
return sprintf( "hpr%04d", $_[0] );
}
],
'date' => [ 'release_date', undef ],
'title' => [ 'title', undef ],
'duration' => [
'duration',
sub {
return interval( $_[0] );
}
],
'summary' => [ 'summary', undef ],
'notes' => [ 'notes', undef ],
'explicit' => [ 'explicit', undef ],
'license' => [
'license_short_name',
undef,
'license',
sub {
return $licenses->{$_[0]};
}
],
'downloads' => [ 'downloads', undef ],
},
# --------------------------------------------------------------------------
'hosts_hosts' => {
'_fields' => [
'hostid', 'host', 'email', 'profile',
'license', 'local_image', 'gpg', 'espeak_name'
],
'_MSQL' =>
# q{SELECT * FROM hosts ORDER BY hostid LIMIT 30 OFFSET 269},
q{SELECT * FROM hosts WHERE hostid = 379 ORDER BY hostid},
# q{SELECT * FROM hosts ORDER BY hostid},
'_PGSQL' => q{
SELECT * FROM hosts WHERE host_id = ?
},
'_INSERT' => q{INSERT INTO hosts
(host_id, host, email, profile, license, local_image, gpg, espeak_name)
VALUES (?,?,?,?,?,?,?,?)
},
'_PK' => ['hostid','host_id'],
'_PGTABLE' => 'hosts',
'hostid' => [ 'host_id', undef ],
'host' => [ 'host', undef ],
'email' => [ 'email', undef ],
'profile' => [ 'profile', undef ],
'license' => [ 'license',
sub {
return $licenses->{$_[0]};
}
],
'local_image' => [ 'local_image', undef ],
'gpg' => [ 'gpg', undef ],
'espeak_name' => [ 'espeak_name', undef ],
},
# --------------------------------------------------------------------------
'episodes_hosts_xref' => {
'_fields' => [ 'id', 'hostid' ],
'_MSQL' =>
q{SELECT e.id,h.hostid
FROM eps e, hosts h
WHERE e.hostid = h.hostid},
'_PGSQL' =>
q{INSERT INTO episodes_hosts_xref
SELECT e.episode_id, h.host_id
FROM episodes e, hosts h
WHERE e.episode_key = ? AND h.host_id = ?},
'_INSERT' => q{INSERT INTO episodes_hosts_xref VALUES (?,?)},
'_PK' => [ undef ],
'_PGTABLE' => 'episodes_hosts_xref',
},
# --------------------------------------------------------------------------
'miniseries_series' => {
'_fields' =>
[ 'id', 'name', 'description', 'private', 'image' ],
'_MSQL' =>
q{SELECT * FROM miniseries ORDER BY id},
'_PGSQL' => q{
SELECT * FROM series WHERE series_id = ?
},
'_INSERT' => q{INSERT INTO series VALUES (?,?,?,?,?)},
'_PK' => [ 'id', 'series_id' ],
'_PGTABLE' => 'series',
'id' => [ 'series_id', undef ],
'name' => [ 'name', undef ],
'description' => [ 'description', undef ],
'private' => [ 'private', undef ],
'image' => [ 'image', undef ],
},
# --------------------------------------------------------------------------
'comments_comments' => {
'_fields' => [
'id', 'eps_id',
'comment_timestamp', 'comment_author_name',
'comment_title', 'comment_text', 'last_changed'
],
'_MSQL' =>
q{SELECT * FROM comments ORDER BY id},
'_PGSQL' => q{
SELECT
e.episode_key,
c.*
FROM comments c
JOIN episodes e USING (episode_id)
WHERE comment_id = ?
},
# (comment_id, episode_id, comment_timestamp, comment_author_name,
# comment_title, comment_text)
'_INSERT' => q{INSERT INTO comments
VALUES (?,id_in_episodes(?),?,?,?,?,?)
},
'_PRE_UPDATE' => q{
ALTER TABLE comments DISABLE TRIGGER USER
},
'_POST_UPDATE' => q{
ALTER TABLE comments ENABLE TRIGGER USER
},
'_PK' => [ 'id', 'comment_id' ],
'_PGTABLE' => 'comments',
'id' => [ 'comment_id', undef ],
'eps_id' => [ 'episode_key',
sub {
return sprintf( "hpr%04d", $_[0] );
}
],
'comment_timestamp' => [ 'comment_timestamp', undef ],
'comment_author_name' => [ 'comment_author_name', undef ],
'comment_title' => [ 'comment_title', undef ],
'comment_text' => [ 'comment_text', undef ],
'last_changed' => [ 'last_changed', undef ],
},
# --------------------------------------------------------------------------
);
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
my $DEF_DEBUG = 0;
#
# Process options
#
my %options;
Options( \%options );
#
# Default help
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 )
if ( $options{'help'} );
#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
my $cfgfile1
= ( defined( $options{config} ) ? $options{config} : $configfile1 );
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
#
# This option is a list, provided as a CSV
#
my $phase_choices = $options{phases};
if ( defined($phase_choices) ) {
#
# We have a list which we'll parse, validate, sort, make unique and filter
#
my $lcsv = Text::CSV_XS->new( { binary => 1, } );
if ( $lcsv->parse($phase_choices) ) {
# Sort fields
@phase_choices = uniq( sort { $a cmp $b } $lcsv->fields() );
# Trim leading and trailing spaces
@phase_choices = apply { $_ =~ s/(^\s*|\s*$)// } @phase_choices;
# Make a list of invalid keywords
my %tmp = map { $_ => 1 } @phases;
my @bad = grep { not exists $tmp{$_} } @phase_choices;
# Deal with all errors
die "Invalid list; no elements\n" if scalar(@phase_choices) == 0;
die "Invalid list; too many elements\n"
if scalar(@phase_choices) > scalar(@phases);
die "Invalid list elements: ", join( ",", @bad ) . "\n"
if scalar(@bad) > 0;
}
else {
die "Failed to parse -list='$phase_choices'\n"
. $lcsv->error_diag() . "\n";
}
}
else {
#
# By default we do all phases
#
@phase_choices = @phases;
}
_debug( $DEBUG > 2, Dumper(\@phase_choices) );
#-------------------------------------------------------------------------------
# Validate the %table_maps hash
#-------------------------------------------------------------------------------
unless ( validate_maps(\%table_maps) ) {
warn "Hash \%table_maps is wrongly structured\n";
exit 1;
}
#-------------------------------------------------------------------------------
# Configuration file for MySQL/MariaDB - load data
#-------------------------------------------------------------------------------
my $conf1 = Config::General->new(
-ConfigFile => $cfgfile1,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config1 = $conf1->getall();
my $conf2 = Config::General->new(
-ConfigFile => $configfile2,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config2 = $conf2->getall();
#-------------------------------------------------------------------------------
# Connect to the MariaDB database
#-------------------------------------------------------------------------------
my $dbtype1 = $config1{database}->{type} // 'mysql';
my $dbhost1 = $config1{database}->{host} // '127.0.0.1';
my $dbport1 = $config1{database}->{port} // 3306;
my $dbname1 = $config1{database}->{name};
my $dbuser1 = $config1{database}->{user};
my $dbpwd1 = $config1{database}->{password};
$dbh1
= DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1",
$dbuser1, $dbpwd1, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh1->{mysql_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Connect to the PostgreSQL database
#-------------------------------------------------------------------------------
my $dbtype2 = $config2{database}->{type} // 'Pg';
my $dbhost2 = $config2{database}->{host} // '127.0.0.1';
my $dbport2 = $config2{database}->{port} // 5432;
my $dbname2 = $config2{database}->{name};
my $dbuser2 = $config2{database}->{user};
my $dbpwd2 = $config2{database}->{password};
$dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2",
$dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh2->{pg_enable_utf8} = 1;
#-------------------------------------------------------------------------------
# Connect to the SQLite database
#-------------------------------------------------------------------------------
$dbh3 = DBI->connect( "dbi:SQLite:dbname=$database3", "", "" );
#-------------------------------------------------------------------------------
# Gather the licence details from the Pg database
#-------------------------------------------------------------------------------
$licenses = load_licenses($dbh2);
#
# Populate the %choices hash
#
my %choices = map { $_ => 1 } @phase_choices;
#
# Perform phases in order, omitting those that are not in the list
#
for my $phase (@phases) {
#---------------------------------------------------------------------------
# Update from the 'eps' table to 'episodes'
#---------------------------------------------------------------------------
if ( $phase eq 'episodes' && exists( $choices{$phase} ) ) {
print "Find episode updates\n" if ($verbose);
update_table( $dbh1, $dbh2, $table_maps{'eps_episodes'}, $dry_run, $verbose );
}
#---------------------------------------------------------------------------
# Update from the 'hosts' table to 'hosts'
#---------------------------------------------------------------------------
elsif ( $phase eq 'hosts' && exists( $choices{$phase} ) ) {
print "Find host updates\n" if ( $verbose > 0 );
update_table( $dbh1, $dbh2, $table_maps{'hosts_hosts'}, $dry_run, $verbose );
}
#---------------------------------------------------------------------------
# Update the 'episodes_hosts_xref' table
#---------------------------------------------------------------------------
elsif ( $phase eq 'eh_xref' && exists( $choices{$phase} ) ) {
print "Find episode/host updates\n" if ( $verbose > 0 );
}
#---------------------------------------------------------------------------
# Update from the 'miniseries' table to 'series'
#---------------------------------------------------------------------------
elsif ( $phase eq 'series' && exists( $choices{$phase} ) ) {
print "Find series updates\n" if ( $verbose > 0 );
update_table( $dbh1, $dbh2, $table_maps{'miniseries_series'},
$dry_run, $verbose );
}
#---------------------------------------------------------------------------
# Update the 'episodes_series_xref' table
#---------------------------------------------------------------------------
elsif ( $phase eq 'es_xref' && exists( $choices{$phase} ) ) {
print "Find episode/series updates\n" if ( $verbose > 0 );
}
#---------------------------------------------------------------------------
# Collect and store the id numbers and tags from the MySQL 'eps' table,
# then update the PostgreSQL tables.
#---------------------------------------------------------------------------
elsif ( $phase eq 'tags' && exists( $choices{$phase} ) ) {
print "Find tag updates\n" if ( $verbose > 0 );
}
#---------------------------------------------------------------------------
# Update from the 'comments' table to 'comments'
#---------------------------------------------------------------------------
elsif ( $phase eq 'comments' && exists( $choices{$phase} ) ) {
print "Find comment updates\n" if ( $verbose > 0 );
update_table( $dbh1, $dbh2, $table_maps{'comments_comments'},
$dry_run, $verbose );
}
#---------------------------------------------------------------------------
# Update archive-related fields the 'episodes' table from 'ia.db'
#---------------------------------------------------------------------------
elsif ( $phase eq 'archived' && exists( $choices{$phase} ) ) {
print "Find archive data updates\n" if ( $verbose > 0 );
}
#---------------------------------------------------------------------------
# Update from the 'assets' table in 'ia.db' to 'assets'
#---------------------------------------------------------------------------
elsif ( $phase eq 'assets' && exists( $choices{$phase} ) ) {
print "Find asset updates\n" if ( $verbose > 0 );
}
#---------------------------------------------------------------------------
# Perform the 'epilogue' actions
#---------------------------------------------------------------------------
elsif ( $phase eq 'epilogue' && exists( $choices{$phase} ) ) {
}
}
exit;
#=== FUNCTION ================================================================
# NAME: validate_maps
# PURPOSE: Validates the hash containing the maps that drive this script
# PARAMETERS: $maps hashref to the maps hash
# $verbose how much to report
# RETURNS: True if the maps are valid, otherwise false
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub validate_maps {
my ( $maps, $verbose ) = @_;
return 0 unless defined($maps);
foreach my $key (sort(keys(%$maps))) {
foreach my $k (qw{_fields _MSQL _PGSQL _INSERT _PK _PGTABLE}) {
unless (exists($maps->{$key}->{$k})) {
print "Missing key: %table_maps{$key}->{$k}\n";
return 0;
}
}
foreach my $k (@{$maps->{_fields}}) {
unless (defined($maps->{$key}->{$k})) {
print "Missing key: %table_maps{$key}->{$k}\n";
return 0;
}
}
}
return 1;
}
#=== FUNCTION ================================================================
# NAME: load_licenses
# PURPOSE: Loads the 'licenses' table from the Pg database
# PARAMETERS: $dbh Handle for the Pg database
# RETURNS: A hashref containing the licence information
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub load_licenses {
my ( $dbh ) = @_;
my ( $licenses, %lic_n2id );
my $sth = $dbh->prepare( q{SELECT * FROM licenses ORDER BY license_id} )
or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
$sth->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Load the entire table as an arrayref of hashrefs
#
$licenses = $sth->fetchall_arrayref( {} );
for my $row (@$licenses) {
$lic_n2id{$row->{short_name}} = $row->{license_id};
}
return \%lic_n2id;
}
#=== FUNCTION ================================================================
# NAME: update_table
# PURPOSE: Updates a Pg table from the corresponding MySQL table driven
# by the global hash %table_maps.
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $map Hashref to the sub-hash in the %table_maps hash
# $dry_run Dry run setting
# $verbose Verbosity level
# RETURNS:
# DESCRIPTION: There is not always a direct table to table correspondence, but
# the %table_maps entries present what is necessary to perform
# the actions.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub update_table {
my ( $dbh1, $dbh2, $map, $dry_run, $verbose ) = @_;
my ( $sth1, $h1, $sth2, $h2, $count, $pgkey );
my ( %diffs, $diffhash );
#
# The primary keys for the two tables
#
my ( $pk1, $pk2 ) = @{ $map->{'_PK'} };
#
# Prepare a query to collect rows from the MySQL database
#
$sth1 = $dbh1->prepare( $map->{'_MSQL'} )
or die $DBI::errstr;
if ( $dbh1->err ) {
warn $dbh1->errstr;
}
#
# Query to look up the equivalent episode in the Pg database.
# TODO: ensure there's SQL in the table before using it!
#
$sth2 = $dbh2->prepare( $map->{'_PGSQL'} )
or die $DBI::errstr;
#
# Query MySQL for the nominated table
#
$sth1->execute;
if ( $dbh1->err ) {
die $dbh1->errstr;
}
#
# Loop though MySQL table rows looking for changes
#
$count = 0;
while ( $h1 = $sth1->fetchrow_hashref ) {
#
# Build the search string for Pg, transforming it if there is code in
# the table to do so.
#
$pgkey = $h1->{$pk1};
if (defined($map->{$pk1}->[1])) {
if (ref($map->{$pk1}->[1]) eq 'CODE') {
$pgkey = $map->{$pk1}->[1]($h1->{$pk1});
}
}
#
# Find the corresponding row in the Pg table
#
$sth2->execute($pgkey);
if ( $dbh2->err ) {
die $dbh2->errstr;
}
if ( $h2 = $sth2->fetchrow_hashref ) {
#
# Found the record in Pg, so now check whether there's
# a difference, and accumulate them if found
#
print "Record found: ", $h1->{$pk1}, "\n" if ( $verbose > 1 );
$diffhash
= compare_fields( $h1, $h2, $map, $verbose );
$diffs{ $h2->{$pk2} } = $diffhash if ( defined($diffhash) );
}
else {
#
# Record was not found so now we have to add it. TODO
#
print "Record not found: ",$h1->{$pk1}, "\n" if ($verbose > 1);
add_row($dbh1, $dbh2, $map, $h1, $dry_run, $verbose );
}
$count++;
}
if (%diffs) {
print "Differences found: ",scalar(keys(%diffs)),"\n" if ($verbose);
_debug( $DEBUG >= 3, Dumper(\%diffs) );
#
# Call a function which iterates through the keys of %diffs (in
# numerical order). For each key construct something like:
# update episodes set title = 'value1', summary = 'value2' where
# episode_key = 'value3';
#
update_differences( $dbh1, $dbh2, $map, $map->{'_PGTABLE'},
$map->{'_PK'}->[1], \%diffs, $dry_run, $verbose );
}
return $count;
}
#=== FUNCTION ================================================================
# NAME: compare_fields
# PURPOSE: Compares fields in a database row looking for changes
# PARAMETERS: $h1 hashref to the current row from the MySQL query
# $h2 hashref to the current row from the Pg query
# $map hashref to the section of the table_maps hash
# for this table update
# RETURNS: A hashref to hash of differences (or undef if none)
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub compare_fields {
my ($h1, $h2, $map, $verbose ) = @_;
my (%diffs, $f1, $f2);
my $len = ($verbose * 40);
my $pk1 = $map->{_PK}->[0];
my @flds = @{$map->{_fields}};
for my $fld (@flds) {
#
# Contents of field in MySQL. We force blank strings to NULL here in
# line with copy_mysql_pg_2.
#
$f1 = nullif($h1->{$fld},'^\s*$');
#
# Contents of field in Pg
#
$f2 = $h2->{$map->{$fld}->[0]};
#
# If there's code to transform fields then run it. Always convert the
# MySQL to the Pg form. This for building the differences, which may
# not be what we want to write to the database.
#
if (defined($map->{$fld}->[1])) {
if (ref($map->{$fld}->[1]) eq 'CODE') {
$f1 = $map->{$fld}->[1]($f1);
}
}
if ($DEBUG > 2) {
printf "D> %-5s %s = %s\n",'MySQL',$fld,coalesce($f1,'[undefined]');
printf "D> %-5s %s = %s\n",'Pg',$fld,coalesce($f2,'[undefined]');
}
#
# Do the fields differ?
#
if ( !equal( $f1, $f2 ) ) {
_debug( $DEBUG > 1, $h1->{$pk1} );
printf "D> Difference found: %s\n%s\n%s\n", $fld,
coalesce( trunc( $f1, $len ), '[undefined]' ),
coalesce( trunc( $f2, $len ), '[undefined]' )
if ( $DEBUG > 1 );
#
# If there are further elements in the array deal with them
#
if ( defined( $map->{$fld}->[3] ) ) {
if ( ref( $map->{$fld}->[3] ) eq 'CODE' ) {
$f1 = $map->{$fld}->[3]($f1);
}
}
$diffs{$fld} = [ $f1, $f2 ];
}
}
#
# Return any differences we found or 'undef' if nothing was found. This
# way of doing it is ugly but it works.
#
return ( scalar( keys(%diffs) ) > 0 ? \%diffs : undef );
}
#=== FUNCTION ================================================================
# NAME: update_differences
# PURPOSE: Updates differences in a Pg table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $map hashref to the section of the table_maps hash
# for this table update
# $table Name of table to update
# $keyfld Name of field within $table for 'WHERE' clause
# $diffs Hashref of a hash of differences
# $dry_run Dry run setting
# $verbose Verbosity level
# RETURNS: Nothing
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub update_differences {
my ( $dbh1, $dbh2, $map, $table, $keyfld, $diffs, $dry_run, $verbose )
= @_;
my ( $sql, $stmt, @bind, %data, %where, $sth2 );
#
# Loop through the updates by key
#
foreach my $key ( sort( keys(%$diffs) ) ) {
#
# Build new SQL each time
#
$sql = SQL::Abstract->new;
#
# Populate the %data hash from the differences, accepting the MySQL
# value (the new one). If we find an empty string we make it 'undef'
# (NULL in the database).
#
foreach my $k ( keys( %{ $diffs->{$key} } ) ) {
$data{$k} = $diffs->{$key}->{$k}->[0];
}
#
# Populate the %where hash
#
%where = ( $keyfld => $key );
if ( $DEBUG > 2 ) {
_debug( 1, '%data: ' . Dumper( \%data ) );
_debug( 1, '%where: ' . Dumper( \%where ) );
}
#
# Build the SQL and the arguments to fill the gaps
#
( $stmt, @bind ) = $sql->update( $table, \%data, \%where );
#
# Report it or do it depending on $dry_run
#
if ($dry_run) {
printf "Not changed %s: dry run mode on\n", $key;
_debug( $DEBUG > 2, "\$stmt: $stmt" );
_debug( $DEBUG > 3, "\@bind: " . join( ', ', @bind ) );
}
else {
#
# If there's a pre-update apply it
#
if ( defined( $map->{_PRE_UPDATE} ) ) {
$dbh2->do( $map->{_PRE_UPDATE} );
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
}
$sth2 = $dbh2->prepare($stmt)
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$sth2->execute(@bind);
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
else {
printf "Updated recordi %s\n", $key if $verbose;
}
#
# If there's a post-update apply it
#
if ( defined( $map->{_POST_UPDATE} ) ) {
$dbh2->do( $map->{_POST_UPDATE} );
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
}
}
#
# Prevent data 'bleed-through'
#
undef %data;
undef %where;
}
}
#=== FUNCTION ================================================================
# NAME: add_row
# PURPOSE: Add a row to a table
# PARAMETERS: $dbh1 Handle for the MariaDB database
# $dbh2 Handle for the Pg database
# $map hashref to the section of the table_maps hash
# for this table update
# $h1 hashref to the current row from the MySQL query
# $dry_run Dry run setting
# $verbose Verbosity level
# RETURNS:
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub add_row {
my ( $dbh1, $dbh2, $map, $h1, $dry_run, $verbose ) = @_;
my ( @data, $fvalue, $sth2 );
my @flds = @{ $map->{_fields} };
for my $fld (@flds) {
#
# Ensure empty strings become NULL values
#
$fvalue = nullif( $h1->{$fld}, '^\s*$' );
#
# Run the code if any, giving precedence to the second pair of
# elements in the controlling array
#
if ( defined( $map->{$fld}->[3] ) ) {
if ( ref( $map->{$fld}->[3] ) eq 'CODE' ) {
$fvalue = $map->{$fld}->[3]($fvalue);
}
}
elsif ( defined( $map->{$fld}->[1] ) ) {
if ( ref( $map->{$fld}->[1] ) eq 'CODE' ) {
$fvalue = $map->{$fld}->[1]($fvalue);
}
}
#
# Save the value for writing to the database
#
push( @data, $fvalue );
}
if ($dry_run) {
printf "Not added %s, dry run mode on\n", $h1->{ $map->{'_PK'}->[0] };
_debug( $DEBUG >= 3,
join( "\n", map { coalesce( $_, '[undef]' ) } @data ) );
}
else {
$sth2 = $dbh2->prepare( $map->{'_INSERT'} )
or die $DBI::errstr;
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
$sth2->execute(@data);
if ( $dbh2->err ) {
warn $dbh2->errstr;
}
else {
printf "Added record %s\n", $h1->{ $map->{'_PK'}->[0] }
if $verbose;
}
}
}
#=== FUNCTION ================================================================
# NAME: trunc
# PURPOSE: Truncate a string to a specified length and add '...' to show
# it was truncated
# PARAMETERS: $string the string to truncate
# $len the length to truncate to
# RETURNS: The truncated string (if it's longer than $length)
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub trunc {
my ( $str, $len ) = @_;
return unless defined($str);
return $str if ( $len >= length($str) );
return substr( $str, 0, $len ) . '...';
}
#=== FUNCTION ================================================================
# NAME: interval
# PURPOSE: Convert a time in seconds to a valid 'HH:MM:SS' interval
# PARAMETERS: $time the time to convert in seconds
# RETURNS: The interval string in the format 'HH:MM:SS' or undef
# DESCRIPTION: TODO
# THROWS: No exceptions
# COMMENTS: Adapted from a routine for generating valid PostgreSQL
# interval times. Probably could be simplified
# SEE ALSO:
#===============================================================================
sub interval {
my ($time) = @_;
return '00:00:00' unless $time; ## no critic
my $date = Date::Manip::Delta->new;
unless ( $date->parse($time) ) {
return $date->printf("%02hv:%02mv:%02sv");
}
else {
warn "Invalid time $time\n";
return undef; ## no critic
}
}
#=== FUNCTION ================================================================
# NAME: default_email
# PURPOSE: Make a default email address for hosts with none
# PARAMETERS: $email Original email address
# $regex Regular expression to check the email against
# $template Template for building the default
# $hostid Host id number to use in the default
# RETURNS: The email address to be used
# DESCRIPTION: If the email address matches a regular expression then
# generate a default from the template and the host id,
# otherwise just return the address untouched.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub default_email {
my ( $email, $regex, $template, $hostid ) = @_;
return (
$email =~ $regex
? sprintf( $template, $hostid )
: $email
);
}
#=== FUNCTION ================================================================
# NAME: equal
# PURPOSE: Compare two strings even if undefined
# PARAMETERS: $s1 The first string
# $s2 The second string
# RETURNS: True if both strings are undefined, false if one isn't
# defined, otherwise the result of comparing them.
# DESCRIPTION: Works on the principle that two undefined strings are equal,
# a defined and an undefined string are not, and otherwise they
# are equal if they are equal!
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO:
#===============================================================================
sub equal {
my ( $s1, $s2 ) = @_;
return 1 if ( !defined($s1) && !defined($s2) );
return 0 if ( !defined($s1) || !defined($s2) );
return ( $s1 eq $s2 );
}
#=== FUNCTION ================================================================
# NAME: nullif
# PURPOSE: Tests a value and makes it 'undef' (equivalent to NULL in the
# database) if it matches a regular expression.
# PARAMETERS: $value Value to test
# $regex Regular expression to match against
# RETURNS: 'undef' if the values match, otherwise the original value
# DESCRIPTION: This is very simple, just a wrapper around the test.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub nullif {
my ( $value, $regex ) = @_;
return $value unless defined($value);
return ( $value =~ $regex ? undef : $value );
}
#=== FUNCTION ================================================================
# NAME: coalesce
# PURPOSE: To find the first defined argument and return it
# PARAMETERS: Arbitrary number of arguments
# RETURNS: The first defined argument or undef if there are none
# DESCRIPTION: Modelled on the SQL function of the same name. It takes a list
# of arguments, scans it for the first one that is not undefined
# and returns it. If an argument is defined and it's an arrayref
# then the referenced array is returned comma-delimited. This
# allows calls such as "coalesce($var,'undef')" which returns
# the value of $var if it's defined, and 'undef' if not and
# doesn't break anything along the way.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub coalesce {
foreach (@_) {
if ( defined($_) ) {
if ( ref($_) eq 'ARRAY' ) {
return join( ',', @{$_} );
}
else {
return $_;
}
}
}
return; # implicit undef
}
#=== FUNCTION ================================================================
# NAME: _debug
# PURPOSE: Prints debug reports
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
# $message Message to print
# RETURNS: Nothing
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
# newline and then adds one in the 'print' to the caller doesn't
# have to bother. Prepends the message with 'D> ' to show it's
# a debug message.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _debug {
my ( $active, $message ) = @_;
chomp($message);
print "D> $message\n" if $active;
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION:
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = ( "help", "config=s", "debug=i", "dry-run!", "verbose+",
"phases=s" );
if ( !GetOptions( $optref, @options ) ) {
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
update_mysql_pg_2 - Update the PostgreSQL database HPR2 from the live MySQL
database or a local copy
=head1 VERSION
This documentation refers to update_mysql_pg_2 version 0.0.2
=head1 USAGE
update_mysql_pg_2 -verbose
update_mysql_pg_2 -config=.hpr_livedb.cfg -verbose
update_mysql_pg_2 -verbose \
-phase='episodes,hosts,eh_xref,series,es_xref,tags,comments,archived,assets,epilogue'
=head1 REQUIRED ARGUMENTS
A complete list of every argument that must appear on the command line.
when the application is invoked, explaining what each of them does, any
restrictions on where each one may appear (i.e. flags that must appear
before or after filenames), and how the various arguments and options
may interact (e.g. mutual exclusions, required combinations, etc.)
If all of the application's arguments are optional this section
may be omitted entirely.
=head1 OPTIONS
A complete list of every available option with which the application
can be invoked, explaining what each does, and listing any restrictions,
or interactions.
If the application has no options this section may be omitted entirely.
=head1 DESCRIPTION
A full description of the application and its features.
May include numerous subsections (i.e. =head2, =head3, etc.)
=head1 DIAGNOSTICS
A list of every error and warning message that the application can generate
(even the ones that will "never happen"), with a full explanation of each
problem, one or more likely causes, and any suggested remedies. If the
application generates exit status codes (e.g. under Unix) then list the exit
status associated with each error.
=head1 CONFIGURATION AND ENVIRONMENT
A full explanation of any configuration system(s) used by the application,
including the names and locations of any configuration files, and the
meaning of any environment variables or properties that can be set. These
descriptions must also include details of any configuration language used
=head1 DEPENDENCIES
A list of all the other modules that this module relies upon, including any
restrictions on versions, and an indication whether these required modules are
part of the standard Perl distribution, part of the module's distribution,
or must be installed separately.
=head1 INCOMPATIBILITIES
A list of any modules that this module cannot be used in conjunction with.
This may be due to name conflicts in the interface, or competition for
system or program resources, or due to internal limitations of Perl
(for example, many modules that use source code filters are mutually
incompatible).
=head1 BUGS AND LIMITATIONS
A list of known problems with the module, together with some indication
whether they are likely to be fixed in an upcoming release.
Also a list of restrictions on the features the module does provide:
data types that cannot be handled, performance issues and the circumstances
in which they may arise, practical limitations on the size of data sets,
special cases that are not (yet) handled, etc.
The initial template usually just has:
There are no known bugs in this module.
Please report problems to <Maintainer name(s)> (<contact address>)
Patches are welcome.
=head1 AUTHOR
<Author name(s)> (<contact address>)
=head1 LICENCE AND COPYRIGHT
Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
Followed by whatever licence you wish to release it under.
For Perl code that is often just:
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker