#!/usr/bin/env perl #=============================================================================== # # FILE: copy_mysql_pg_2 # # USAGE: ./copy_mysql_pg_2 # # DESCRIPTION: Copies HPR show data from the MariaDB database to an # experimental PostgreSQL database (second version "HPR2") # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.1.5 # CREATED: 2017-10-23 19:11:48 # REVISION: 2019-06-01 16:10:23 # #=============================================================================== use 5.010; use strict; use warnings; use utf8; use Getopt::Long; use Pod::Usage; use Config::General; use List::MoreUtils qw{uniq apply}; use Text::CSV; use DBI; use SQL::Abstract; use Data::Dumper; # # Version number (manually incremented) # our $VERSION = '0.1.5'; # # Script and directory names # ( my $PROG = $0 ) =~ s|.*/||mx; ( my $DIR = $0 ) =~ s|/?[^/]*$||mx; $DIR = '.' unless $DIR; #------------------------------------------------------------------------------- # Declarations #------------------------------------------------------------------------------- # # Constants and other declarations # my $basedir = "$ENV{HOME}/HPR/PostgreSQL_Database"; my $configfile1 = "$basedir/.hpr_db.cfg"; my $configfile2 = "$basedir/.hpr_pg2.cfg"; my $database3 = "$basedir/ia.db"; # soft link my $email_template = 'host_%s@hackerpublicradio.org'; my $default_licence = 'CC-BY-SA'; my ( $dbh1, $sth1, $h1, $rv1 ); my ( $dbh2, $sth2, $h2, $rv2 ); my ( $dbh3, $sth3, $h3, $rv3 ); my (@phase_choices); my ( %eps_tags, %data ); my @phases = ( 'episodes', 'hosts', 'eh_xref', 'series', 'es_xref', 'tags', 'comments', 'archived', 'assets', 'epilogue' ); # # Enable Unicode mode # binmode STDOUT, ":encoding(UTF-8)"; binmode STDERR, ":encoding(UTF-8)"; #------------------------------------------------------------------------------- # Options and arguments #------------------------------------------------------------------------------- my $DEF_DEBUG = 0; # # Process options # my %options; Options( \%options ); # # Default help # pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 ) if ( $options{'help'} ); # # Collect options # my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG ); my $cfgfile1 = ( defined( $options{config} ) ? $options{config} : $configfile1 ); my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 ); my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 ); # # This option is a list, provided as a CSV # my $phase_choices = $options{phases}; if ( defined($phase_choices) ) { # # We have a list which we'll parse, validate, sort, make unique and filter # my $lcsv = Text::CSV_XS->new( { binary => 1, } ); if ( $lcsv->parse($phase_choices) ) { # Sort fields @phase_choices = uniq( sort { $a cmp $b } $lcsv->fields() ); # Trim leading and trailing spaces @phase_choices = apply { $_ =~ s/(^\s*|\s*$)// } @phase_choices; # Make a list of invalid keywords my %tmp = map { $_ => 1 } @phases; my @bad = grep { not exists $tmp{$_} } @phase_choices; # Deal with all errors die "Invalid list; no elements\n" if scalar(@phase_choices) == 0; die "Invalid list; too many elements\n" if scalar(@phase_choices) > scalar(@phases); die "Invalid list elements: ", join( ",", @bad ) . "\n" if scalar(@bad) > 0; } else { die "Failed to parse -list='$phase_choices'\n" . $lcsv->error_diag() . "\n"; } } else { # # By default we do all phases # @phase_choices = @phases; } #------------------------------------------------------------------------------- # Configuration file for MySQL/MariaDB - load data #------------------------------------------------------------------------------- my $conf1 = Config::General->new( -ConfigFile => $cfgfile1, -InterPolateVars => 1, -ExtendedAccess => 1 ); my %config1 = $conf1->getall(); my $conf2 = Config::General->new( -ConfigFile => $configfile2, -InterPolateVars => 1, -ExtendedAccess => 1 ); my %config2 = $conf2->getall(); #------------------------------------------------------------------------------- # Connect to the MariaDB database #------------------------------------------------------------------------------- my $dbtype1 = $config1{database}->{type} // 'mysql'; my $dbhost1 = $config1{database}->{host} // '127.0.0.1'; my $dbport1 = $config1{database}->{port} // 3306; my $dbname1 = $config1{database}->{name}; my $dbuser1 = $config1{database}->{user}; my $dbpwd1 = $config1{database}->{password}; $dbh1 = DBI->connect( "dbi:$dbtype1:host=$dbhost1;port=$dbport1;database=$dbname1", $dbuser1, $dbpwd1, { AutoCommit => 1 } ) or die $DBI::errstr; # # Enable client-side UTF8 # $dbh1->{mysql_enable_utf8} = 1; #------------------------------------------------------------------------------- # Connect to the PostgreSQL database #------------------------------------------------------------------------------- my $dbtype2 = $config2{database}->{type} // 'Pg'; my $dbhost2 = $config2{database}->{host} // '127.0.0.1'; my $dbport2 = $config2{database}->{port} // 5432; my $dbname2 = $config2{database}->{name}; my $dbuser2 = $config2{database}->{user}; my $dbpwd2 = $config2{database}->{password}; $dbh2 = DBI->connect( "dbi:$dbtype2:host=$dbhost2;database=$dbname2;port=$dbport2", $dbuser2, $dbpwd2, { PrintError => 0, AutoCommit => 1 } ) or die $DBI::errstr; # # Enable client-side UTF8 # $dbh2->{pg_enable_utf8} = 1; #------------------------------------------------------------------------------- # Connect to the SQLite database #------------------------------------------------------------------------------- $dbh3 = DBI->connect( "dbi:SQLite:dbname=$database3", "", "" ); my %choices = map { $_ => 1 } @phase_choices; # # Perform phases in order, omitting those that are not in the list # for my $phase (@phases) { #--------------------------------------------------------------------------- # Copy the 'eps' table to 'episodes' #--------------------------------------------------------------------------- if ( $phase eq 'episodes' && exists( $choices{$phase} ) ) { print "Build episodes table\n" if ( $verbose > 0 ); if ( check_table( $dbh2, 'episodes' ) ) { build_episodes_table( $dbh1, $dbh2, $verbose ) unless $dry_run; } else { print "** Table 'episodes' is not empty\n"; } } #--------------------------------------------------------------------------- # Copy the 'hosts' table to 'hosts' #--------------------------------------------------------------------------- elsif ( $phase eq 'hosts' && exists( $choices{$phase} ) ) { print "Build hosts table\n" if ( $verbose > 0 ); if ( check_table( $dbh2, 'hosts' ) ) { build_hosts_table( $dbh1, $dbh2, $email_template, $verbose ) unless $dry_run; } else { print "** Table 'hosts' is not empty\n"; } } #--------------------------------------------------------------------------- # Generate the 'episodes_hosts_xref' table #--------------------------------------------------------------------------- elsif ( $phase eq 'eh_xref' && exists( $choices{$phase} ) ) { print "Build episodes_hosts_xref table\n" if ( $verbose > 0 ); if ( check_table( $dbh2, 'episodes_hosts_xref' ) ) { build_episodes_hosts_xref_table( $dbh1, $dbh2, $verbose ) unless $dry_run; } else { print "** Table 'episodes_hosts_xref' is not empty\n"; } } #--------------------------------------------------------------------------- # Copy the 'miniseries' table to 'series' #--------------------------------------------------------------------------- elsif ( $phase eq 'series' && exists( $choices{$phase} ) ) { print "Build series table\n" if ( $verbose > 0 ); if ( check_table( $dbh2, 'series' ) ) { build_series_table( $dbh1, $dbh2, $verbose ) unless $dry_run; } else { print "** Table 'series' is not empty\n"; } } #--------------------------------------------------------------------------- # Generate the 'episodes_series_xref' table #--------------------------------------------------------------------------- elsif ( $phase eq 'es_xref' && exists( $choices{$phase} ) ) { print "Build episodes_series_xref table\n" if ( $verbose > 0 ); if ( check_table( $dbh2, 'episodes_series_xref' ) ) { build_episodes_series_xref_table( $dbh1, $dbh2, $verbose ) unless $dry_run; } else { print "** Table 'episodes_series_xref' is not empty\n"; } } #--------------------------------------------------------------------------- # Collect and store the id numbers and tags from the MySQL 'eps' table, # then add them to the PostgreSQL tables. #--------------------------------------------------------------------------- elsif ( $phase eq 'tags' && exists( $choices{$phase} ) ) { print "Build tags and episodes_tags_xref tables\n" if ( $verbose > 0 ); if ( check_table( $dbh2, 'tags' ) && check_table( $dbh2, 'episodes_tags_xref' ) ) { unless ($dry_run) { %eps_tags = %{ collect_eps_tags( $dbh1, $verbose ) }; if (%eps_tags) { build_tags_table( $dbh2, $verbose, \%eps_tags ); } } } else { print "** Tables 'tags' and/or 'episodes_tags_xref' are not empty\n"; } } #--------------------------------------------------------------------------- # Copy the 'comments' table to 'comments' #--------------------------------------------------------------------------- elsif ( $phase eq 'comments' && exists( $choices{$phase} ) ) { print "Build comments table\n" if ( $verbose > 0 ); if (check_table($dbh2,'comments')) { build_comments_table( $dbh1, $dbh2, $verbose ) unless $dry_run; } else { print "** Table 'comments' is not empty\n"; } } #--------------------------------------------------------------------------- # Fill in archive-related fields the 'episodes' table from 'ia.db' #--------------------------------------------------------------------------- elsif ( $phase eq 'archived' && exists( $choices{$phase} ) ) { print "Copy archive-related fields to episodes table\n" if ( $verbose > 0 ); update_episodes_table( $dbh3, $dbh2, $verbose ) unless $dry_run; } #--------------------------------------------------------------------------- # Copy the 'assets' table in 'ia.db' to 'assets' #--------------------------------------------------------------------------- elsif ( $phase eq 'assets' && exists( $choices{$phase} ) ) { print "Build assets table\n" if ( $verbose > 0 ); if (check_table($dbh2,'assets')) { build_assets_table( $dbh3, $dbh2, $verbose ) unless $dry_run; } else { print "** Table 'assets' is not empty\n"; } } #--------------------------------------------------------------------------- # Perform the 'epilogue' actions #--------------------------------------------------------------------------- elsif ( $phase eq 'epilogue' && exists( $choices{$phase} ) ) { print "Perform epilogue actions\n" if ( $verbose > 0 ); # # Resolve the "double host" problems. We have *not* copied these # across from the MySQL 'hosts' table, so can resolve them now. # resolve_double_hosts( $dbh1, $dbh2, $email_template, $default_licence, $verbose ) unless $dry_run; # # Determine the first show date per host, assuming that's when they # were added to the database # compute_host_date_added($dbh2) unless $dry_run; } } exit; #=== FUNCTION ================================================================ # NAME: build_episodes_table # PURPOSE: Copy the data from the MariaDB 'eps' table to the Pg # 'episodes' table # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_episodes_table { my ( $dbh1, $dbh2, $verbose ) = @_; my $count = 0; $count += load_twat_episodes_table( $dbh1, $dbh2, 'twat', $verbose ); $count += load_eps_table( $dbh1, $dbh2, 'hpr', $verbose ); print "Added $count records to the 'episodes' table\n" if ( $verbose > 0 ); # # Set the sequence to the correct value # alter_seq( $dbh2, 'episodes', 'episode_id', 'episode_seq' ); } #=== FUNCTION ================================================================ # NAME: load_twat_episodes_table # PURPOSE: Load the 'twat_episodes' table from the MariaDB database into # the PostgreSQL 'episodes' table. # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $key_prefix String to prefix the 'episode_key' field with # $verbose Verbosity level # RETURNS: Number of rows added # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub load_twat_episodes_table { my ( $dbh1, $dbh2, $key_prefix, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count ); # # Query to collect the entire 'twat_episodes' table # $sth1 = $dbh1->prepare('SELECT * FROM twat_episodes ORDER BY ep_num') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to insert into the PostgreSQL 'episodes' table # $sth2 = $dbh2->prepare( q{INSERT INTO episodes (episode_key, release_date, title, notes, explicit, license) VALUES (?,date(to_timestamp(?)),?,?,TRUE, id_in_licenses('CC-BY-SA'))} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Query MariaDB for the entire 'twat_episodes' table # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop though 'twat_episodes' table rows writing them to the PostgreSQL # 'episodes' table # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $sth2->execute( sprintf( "%s%04d", $key_prefix, $h1->{ep_num} ), $h1->{date}, coalesce( $h1->{topic}, '[undefined]' ), coalesce( $h1->{writeup}, '
No notes
' ), ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Copied $count records from 'twat_episodes'\n" if ( $verbose > 0 ); return $count; } #=== FUNCTION ================================================================ # NAME: load_eps_table # PURPOSE: Load the 'eps' table from the MariaDB database into the # PostgreSQL 'episodes' table. # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $key_prefix String to prefix the 'episode_key' field with # $verbose Verbosity level # RETURNS: Number of rows added # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub load_eps_table { my ( $dbh1, $dbh2, $key_prefix, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count ); # # Query to collect rows from the old dfatabase # $sth1 = $dbh1->prepare('SELECT * FROM eps ORDER BY id') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to insert rows into the new database # $sth2 = $dbh2->prepare( q{INSERT INTO episodes (episode_key, release_date, title, summary, notes, explicit, license, duration, downloads) VALUES (?,?,?,?,?,?,id_in_licenses(?),? * '1 second'::interval,?)} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Query MariaDB for the entire 'eps' table # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop though 'eps' table rows writing them to the PostgreSQL 'episodes' # table # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $sth2->execute( sprintf( "%s%04d", $key_prefix, $h1->{id} ), $h1->{date}, $h1->{title}, nullif( $h1->{summary}, '^\s*$' ), $h1->{notes}, $h1->{explicit}, $h1->{license}, $h1->{duration}, $h1->{downloads}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Copied $count records from 'eps'\n" if ( $verbose > 0 ); return $count; } #=== FUNCTION ================================================================ # NAME: build_hosts_table # PURPOSE: Copy the data from the Mariadb 'hosts' table to the Pg 'hosts' # table # PARAMETERS: $dbh1 Handle for the MariaDB table # $dbh2 Handle for the Pg database # $template Template for building the default email # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: Copies records from one table into the other. However, some # transformations are made along the way: blank emails and # 'admin@hackerpublicradio.org' are converted to 'host_NNN@hpr', # empty profiles and GPG strings are turned into NULL values, # and licence settings are converted to the id of the licence # in the 'licenses' table (in the database). # The double host problem is an issue though. Since the email # address field is unique we get errors since the double hosts # have duplicate addresses. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_hosts_table { my ( $dbh1, $dbh2, $template, $verbose ) = @_; my ( $sth1, $h1, $sth2, $rv, $count ); $sth1 = $dbh1->prepare('SELECT * FROM hosts') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } $sth2 = $dbh2->prepare( q{INSERT INTO hosts (host_id, host, email, profile, license, local_image, gpg, espeak_name) VALUES (?,?,?,?,id_in_licenses(?),?,?,?)} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { # # Ignore "double host" entries # if ( $h1->{host} !~ /^(.+)\s+and\s+(.+)$/ ) { $count++; $sth2->execute( $h1->{hostid}, $h1->{host}, default_email( $h1->{email}, '^(\s*|admin@hackerpublicradio.org)$', $template, $h1->{hostid} ), nullif( $h1->{profile}, '^\s*$' ), $h1->{license}, $h1->{local_image}, nullif( $h1->{gpg}, '^\s*$' ), $h1->{espeak_name}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } } print "Copied $count records\n" if ( $verbose > 0 ); # # Set the sequence to the correct value # alter_seq( $dbh2, 'hosts', 'host_id', 'host_seq' ); } #=== FUNCTION ================================================================ # NAME: build_episodes_hosts_xref_table # PURPOSE: Generates the cross reference table by examining the 'eps' and # 'hosts' tables in the MariaDB database. # PARAMETERS: $dbh1 Handle for the MariaDB table # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_episodes_hosts_xref_table { my ( $dbh1, $dbh2, $verbose ) = @_; my $count = 0; $count += link_twat_episodes( $dbh1, $dbh2, $verbose ); $count += link_hpr_episodes( $dbh1, $dbh2, $verbose ); print "Added a total of $count links\n" if ( $verbose > 0 ); } #=== FUNCTION ================================================================ # NAME: link_twat_episodes # PURPOSE: Generate cross reference links between TwaT episodes and hosts # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: The count of links # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub link_twat_episodes { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count, $episode_key ); # # Query to find the TwaT episode and hostid from the old database. Use # LEFT JOIN so we see when there's no host match. # $sth1 = $dbh1->prepare( # q{SELECT t.ep_num,h.hostid # FROM twat_episodes t, hosts h # WHERE t.host = h.host} q{SELECT t.ep_num, h.hostid FROM twat_episodes t LEFT JOIN hosts h ON (t.host = h.host)} ) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to add a cross reference link to the new database # $sth2 = $dbh2->prepare( q{INSERT INTO episodes_hosts_xref SELECT e.episode_id, h.host_id FROM episodes e, hosts h WHERE e.episode_key = ? AND h.host_id = ?} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Collect all the TwaT episodes and host id numbers # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop through the result of the query, adding links to the new database. # If the returned hostid is NULL then we don't have this host in the # 'hosts' table and need to warn about it. # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $episode_key = sprintf( "twat%04d", $h1->{ep_num} ); if (defined($h1->{hostid})) { $count++; $sth2->execute( $episode_key, $h1->{hostid}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } else { warn "Unable to link $episode_key to a host\n"; } } print "Added $count links to TwaT episodes\n" if ( $verbose > 0 ); return $count; } #=== FUNCTION ================================================================ # NAME: link_hpr_episodes # PURPOSE: Generate cross reference links between HPR episodes and hosts # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: The count of links # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub link_hpr_episodes { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count, $episode_key ); # # Query to find the HPR episode and hostid from the old database # $sth1 = $dbh1->prepare( q{SELECT e.id,h.hostid FROM eps e, hosts h WHERE e.hostid = h.hostid} ) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to add a cross reference link to the new database # $sth2 = $dbh2->prepare( q{INSERT INTO episodes_hosts_xref SELECT e.episode_id, h.host_id FROM episodes e, hosts h WHERE e.episode_key = ? AND h.host_id = ?} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Collect all the HPR episodes and host id numbers # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop through the result of the query, adding links to the new database # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $episode_key = sprintf( "hpr%04d", $h1->{id} ); $sth2->execute( $episode_key, $h1->{hostid}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Added $count links to HPR episodes\n" if ( $verbose > 0 ); return $count; } #=== FUNCTION ================================================================ # NAME: build_series_table # PURPOSE: Copy the data from the Mariadb 'miniseries' table to the Pg # 'series' table # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_series_table { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count ); $sth1 = $dbh1->prepare('SELECT * FROM miniseries') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } $sth2 = $dbh2->prepare('INSERT INTO series VALUES (?,?,?,?,?)') or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Query MariaDB for the entire 'miniseries' table # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop though 'miniseries' table rows writing them to the PostgreSQL # 'series' table # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $sth2->execute( $h1->{id}, $h1->{name}, $h1->{description}, $h1->{private}, nullif( $h1->{image}, '^\s*$' ), # $h1->{valid}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Copied $count records\n" if ($verbose > 0); # # Set the sequence to the correct value # alter_seq( $dbh2, 'series', 'series_id', 'series_seq' ); } #=== FUNCTION ================================================================ # NAME: build_episodes_series_xref_table # PURPOSE: Generates the cross reference table by examining the 'eps' and # 'series' tables in the MariaDB database. # PARAMETERS: $dbh1 Handle for the MariaDB table # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_episodes_series_xref_table { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count, $episode_key ); # # Query to find the HPR episode and series id from the old database # $sth1 = $dbh1->prepare( q{SELECT e.id AS epid, m.id AS msid FROM eps e, miniseries m WHERE e.series = m.id} ) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to add a cross reference link to the new database # $sth2 = $dbh2->prepare( q{INSERT INTO episodes_series_xref SELECT e.episode_id, s.series_id FROM episodes e, series s WHERE e.episode_key = ? AND s.series_id = ?} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Collect all the HPR episodes and host id numbers (no TwaT shows are in # series) # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop through the result of the query, adding links to the new database # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $episode_key = sprintf( "hpr%04d", $h1->{epid} ); $sth2->execute( $episode_key, $h1->{msid} ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Added a total of $count series links\n" if ( $verbose > 0 ); } #=== FUNCTION ================================================================ # NAME: build_tags_table # PURPOSE: Using the data structure built from the MariaDB database # populate the many-to-many table in the Pg database # PARAMETERS: $dbh Handle for the Pg database # $verbose Verbosity level # $tag_hash Reference to a hash of episode ids and tags # for each episode # RETURNS: Nothing # DESCRIPTION: Before being called the tags in the MariaDB database are # gathered into a hash which is passed to this function as an # argument. The hash is keyed by episode number and each value # consists of an arrayref containing the tag strings. These tags # are then processed to add to the PostgreSQL database. # For each episode (in sorted order) the tag array is processed. # If a tag already exists in the 'tags' table the tag id is # stored for later, otherwise the tag is added to the 'tags' # table. The tag id from either source is used to add to the # cross reference table 'episodes_tags_xref'. # Adding a link requires the provision of the episode key, which # in this case will be 'hprNNNN' where 'NNNN' is a 4 digit zero # padded number. This is converted to the episode id by # a PostgreSQL function. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_tags_table { my ( $dbh, $verbose, $tag_hash ) = @_; my ( $sth1, $h1, $sth2, $sth3, $rv); my ( $tags, $tid, $episode_key, $count1, $count2 ); # # Query to find if a tag already exists in the PostgreSQL database # $sth1 = $dbh->prepare(q{SELECT * FROM tags WHERE tag = ?}); # # Query to add a new tag # $sth2 = $dbh->prepare(q{INSERT INTO tags (tag) VALUES(?)}); # # Query to add a new joining row # $sth3 = $dbh->prepare( q{INSERT INTO episodes_tags_xref VALUES(id_in_episodes(?),?)} ); $count1 = $count2 = 0; foreach my $id ( sort { $a <=> $b } keys( %{$tag_hash} ) ) { # # The episode key is now more than a number # $episode_key = sprintf("hpr%04d", $id); # # Get the array of tags for this episode id # $tags = $tag_hash->{$id}; # # Loop through the array of tags (using an integer so we can index the # current tag) # for my $i ( 0 .. $#$tags ) { # # Look to see if this tag exists # $sth1->execute( $tags->[$i] ); if ( $dbh->err ) { warn $dbh->errstr; } # # If it's already in the table just store the id for later # otherwise add a new entry # if ( $h1 = $sth1->fetchrow_hashref ) { $tid = $h1->{tag_id}; } else { # # Add the tag to 'tags' # $count1++; $rv = $sth2->execute( $tags->[$i] ); if ( $dbh->err ) { warn $dbh->errstr; } $rv = 0 if ( $rv eq '0E0' ); # # Get the id number of the inserted tag # $tid = $dbh->last_insert_id( undef, undef, undef, undef, { sequence => 'tag_seq' } ); } $count2++; $rv = $sth3->execute( $episode_key, $tid ); if ( $dbh->err ) { warn $dbh->errstr; } $rv = 0 if ( $rv eq '0E0' ); } } if ($verbose > 0) { print "Added $count1 tags\n"; print "Added $count2 cross references\n"; } } #=== FUNCTION ================================================================ # NAME: collect_eps_tags # PURPOSE: Collects the tags from the eps.tags field # PARAMETERS: $dbh Database handle # $verbose Verbosity level # RETURNS: A reference to the hash created by collecting all the tags # DESCRIPTION: Queries the MariaDB 'eps' table for all of the rows containing # tags, returning the comma-separated list with the id number. # Each CSV list is then parsed and the result turned into a hash # keyed on the id number and containing a sorted array of tags. # If the level of verbosity is greater than 2 the tags hash is # dumped (ironically, as a CSV list!). # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub collect_eps_tags { my ( $dbh, $verbose ) = @_; my ( $status, @fields, %hash ); my ( $sth, $h ); # # For parsing the field as CSV # my $csv = Text::CSV_XS->new; # # Query the MariaDB 'eps' table for all the id and tags # $sth = $dbh->prepare( q{SELECT id,tags FROM eps WHERE length(tags) > 0 ORDER BY id} ) or die $DBI::errstr; if ( $dbh->err ) { warn $dbh->errstr; } $sth->execute; if ( $dbh->err ) { warn $dbh->errstr; } # # Loop through what we got # while ( $h = $sth->fetchrow_hashref ) { # # Parse the tag list # $status = $csv->parse( $h->{tags} ); unless ($status) { # # Report any errors # print "Parse error on episode ", $h->{id}, "\n"; print $csv->error_input(), "\n"; next; } @fields = $csv->fields(); next unless (@fields); # # Trim all tags (don't alter $_ when doing it) # @fields = map { my $t = $_; $t =~ s/(^\s+|\s+$)//g; $t; } @fields; #print "$h->{id}: ",join(",",@fields),"\n"; # # Save the id and its tags, sorted for comparison # $hash{ $h->{id} } = [ sort @fields ]; } # # Dump all id numbers and tags if the verbose level is high enough # if ( $verbose >= 2 ) { print "\nTags collected from the 'eps' table\n\n"; foreach my $id ( sort { $a <=> $b } keys(%hash) ) { printf "%04d: %s\n", $id, join( ",", @{ $hash{$id} } ); } } return \%hash; } #=== FUNCTION ================================================================ # NAME: build_comments_table # PURPOSE: Copy the data from the Mariadb 'comments' table to the Pg # 'comments' table # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_comments_table { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count ); # # Query to collect comments from the old database # $sth1 = $dbh1->prepare('SELECT * FROM comments') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to insert comment data into the new database # $sth2 = $dbh2->prepare( q{INSERT INTO comments VALUES (?,id_in_episodes(?),?,?,?,?,?)} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Query MariaDB for the entire 'comments' table # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop though 'comments' table rows writing them to the PostgreSQL # 'comments' table # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $sth2->execute( $h1->{id}, sprintf("hpr%04d",$h1->{eps_id}), $h1->{comment_timestamp}, nullif( $h1->{comment_author_name}, '^\s*$' ), nullif( $h1->{comment_title}, '^\s*$' ), $h1->{comment_text}, $h1->{last_changed}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Copied $count comments\n" if ($verbose > 0); # # Set the sequence to the correct value # alter_seq( $dbh2, 'comments', 'comment_id', 'comment_seq' ); } #=== FUNCTION ================================================================ # NAME: update_episodes_table # PURPOSE: Initialise archiv-related fields in the Pg 'episodes' table # from the SQLite 'episodes' table # PARAMETERS: $dbh1 Handle for the SQLite database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub update_episodes_table { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count ); # # Query to collect episodes from the SQLite database # $sth1 = $dbh1->prepare('SELECT * FROM episodes') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to insert fields into the Pg database 'episodes' table. We just # write whatever comes in, no check for existing data. # $sth2 = $dbh2->prepare( q{UPDATE episodes SET archived = ?, archive_date = ?, IA_URL = ? WHERE episode_key = ?} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Query SQLite for the entire 'episodes' table # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop though 'episodes' table rows writing selected fields to the # PostgreSQL 'episodes' table # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $sth2->execute( $h1->{uploaded}, $h1->{archive_date}, $h1->{IA_URL}, sprintf("hpr%04d",$h1->{id}), ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Updated $count rows in the episodes table\n" if ($verbose > 0); } #=== FUNCTION ================================================================ # NAME: build_assets_table # PURPOSE: Copy the data from the SQLite 'assets' table to the Pg # 'assets' table # PARAMETERS: $dbh1 Handle for the SQLite database # $dbh2 Handle for the Pg database # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub build_assets_table { my ( $dbh1, $dbh2, $verbose ) = @_; my ( $sth1, $h1, $sth2, $count ); # # Query to collect assets from the SQLite database # $sth1 = $dbh1->prepare('SELECT * FROM assets') or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Query to insert asset data into the new database # $sth2 = $dbh2->prepare( q{INSERT INTO assets VALUES (DEFAULT,id_in_episodes(?),?,?,?)} ) or die $DBI::errstr; if ( $dbh2->err ) { warn $dbh2->errstr; } # # Query SQLite for the entire 'assets' table # $sth1->execute; if ( $dbh1->err ) { die $dbh1->errstr; } # # Loop though 'assets' table rows writing them to the PostgreSQL # 'assets' table # $count = 0; while ( $h1 = $sth1->fetchrow_hashref ) { $count++; $sth2->execute( sprintf("hpr%04d",$h1->{episode_id}), $h1->{URL}, $h1->{filename}, $h1->{uploaded}, ); if ( $dbh2->err ) { die $dbh2->errstr; } } print "Copied $count assets\n" if ($verbose > 0); # # Set the sequence to the correct value # alter_seq( $dbh2, 'assets', 'asset_id', 'asset_seq' ); } #=== FUNCTION ================================================================ # NAME: resolve_double_hosts # PURPOSE: Turn any double host entries in the 'hosts' table into # "singletons" # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $template Template for building the default email # $licence string version of the short CC licence name # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub resolve_double_hosts { my ( $dbh1, $dbh2, $template, $licence, $verbose ) = @_; my ( $sth1, $h1, $rv1, $sth2, $h2, $rv2, $sth3, $h3, $rv3, $sth4, $h4, $rv4 ); my ( %doubles, @h, %hosts, $count, $unknown ); # # Search the 'hosts' table in the old database for "host1 and host2" # strings # $count = find_double_hosts($dbh1, \%doubles, \%hosts); # # If no doubles there's nothing to do # if ($count == 0) { print "No doubles found\n" if ($verbose > 0); return; } # # Scan the list of individual hosts we stored earlier and find them in # the 'hosts' table (assuming that the hostnames we have extracted match # exactly) # $unknown = find_hosts($dbh1, \%doubles, \%hosts); # # Allocate all unknown hosts a host id in the PostgreSQL database, and give an # unique email address. # if ( $unknown > 0 ) { register_unknown($dbh2, \%doubles, \%hosts, $template, $licence); } # # Now %doubles contains all the original names and host ids and %hosts # contains the parsed out names and their ids. We can look for shows # attributed to the double hosts and re-attribute them to the single # hosts. # link_double_twat_episodes($dbh1,$dbh2,\%doubles,$verbose); link_double_hpr_episodes($dbh1,$dbh2,\%doubles,$verbose); return; } #=== FUNCTION ================================================================ # NAME: find_double_hosts # PURPOSE: Search the 'hosts' table in the old database for "host1 and # host2" strings. # PARAMETERS: $dbh handle for the MariaDB database # $doubles hashref where details will be stored # $hosts hashref to contain single host details # RETURNS: The number of double hosts found # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub find_double_hosts { my ($dbh, $doubles, $hosts) = @_; my ($sth, $h, @h, $count); # # Find the double hosts in the old database # $sth = $dbh->prepare( q{ SELECT hostid, host FROM hosts WHERE host regexp '[[:<:]]and[[:>:]]' ORDER BY hostid } ) or die $DBI::errstr; if ( $dbh->err ) { warn $dbh->errstr; return 0; } $sth->execute; if ( $dbh->err ) { die $dbh->errstr; } # # Loop through the doubles we found, counting them as we go # $count = 0; while ( $h = $sth->fetchrow_hashref ) { # # Each hash value is a hash containing the original id, and, in a sub-hash # the replacement ids # $doubles->{$h->{host}} = { double => $h->{hostid}, singles => {}, }; # # Parse the double host string into an array # @h = ( $h->{host} =~ /^(.+)\s+and\s+(.+)$/ ); # # Initialise the entries for %doubles and %hosts # for my $host (@h) { $doubles->{$h->{host}}->{singles}->{$host} = undef; unless ( exists( $hosts->{$host} ) ) { $hosts->{$host} = 0; } } $count++; } return $count; } #=== FUNCTION ================================================================ # NAME: find_hosts # PURPOSE: Find individual hosts in the old MariaDB database using the # names we found as double hosts # PARAMETERS: $dbh handle for the MariaDB database # $doubles hashref where details will be stored # $hosts hashref to contain single host details # RETURNS: The number of unknown hosts # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub find_hosts { my ($dbh, $doubles, $hosts) = @_; my ($sth, $h, $rv, @h, $unknown); # # Query to find the host by name in the old database # $sth = $dbh->prepare(q{SELECT hostid FROM hosts WHERE host = ?}) or die $DBI::errstr; if ( $dbh->err ) { warn $dbh->errstr; } # # Scan the list of individual hosts we stored earlier and find them in # the 'hosts' table (assuming that the hostnames we have extracted match # exactly) # $unknown = 0; foreach my $host ( sort(keys(%{$hosts})) ) { $rv = $sth->execute($host); if ( $dbh->err ) { die $dbh->errstr; } $rv = 0 if ( $rv eq '0E0' ); if ($rv) { # # Found id for host. Save in the %doubles hash # $h = $sth->fetchrow_hashref; $hosts->{$host} = $h->{hostid}; save_hostid($doubles,$host,$h->{hostid}); } else { # # Can't find this host # $unknown++; } } return $unknown; } #=== FUNCTION ================================================================ # NAME: register_unknown # PURPOSE: Create an entry in the 'hosts' table of the new database # wherever there's a host that's unknown (has a zero id in the # %hosts hash). # PARAMETERS: $dbh handle for the MariaDB database # $doubles hashref where details will be stored # $hosts hashref to contain single host details # $email_template template for making an email address # $licence string version of the short CC licence name # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub register_unknown { my ( $dbh, $doubles, $hosts, $email_template, $licence ) = @_; my ( $sth, $h, $rv, $new_email ); # # PostgreSQL query to register an unknown host # $sth = $dbh->prepare( q{INSERT INTO hosts (host,email,license) VALUES (?,?,id_in_licenses(?))} ) or die $DBI::errstr; if ( $dbh->err ) { warn $dbh->errstr; } foreach my $host ( sort( keys( %{$hosts} ) ) ) { if ( $hosts->{$host} == 0 ) { # # Write a row to the 'hosts' table. There has to be an email, # but we can't compute one until the row has been added. # $rv = $sth->execute( $host, 'placeholder', $licence ); if ( $dbh->err ) { die $dbh->errstr; } # # Save the id number we just generated as the new host id # my $newid = $dbh->last_insert_id( undef, undef, undef, undef, { sequence => 'host_seq' } ); $hosts->{$host} = $newid; save_hostid( $doubles, $host, $newid ); printf "Created new host %s (%d)\n", $host, $newid if ( $verbose > 0 ); # # Give the new host entry a default email address # $new_email = sprintf( $email_template, $newid ); $rv = $dbh->do( 'UPDATE hosts SET email = ? WHERE host_id = ?', undef, $new_email, $newid ); if ( $dbh->err ) { warn $dbh->errstr; } $rv = 0 if ( $rv eq '0E0' ); warn "Failed to set email address $new_email for $host\n" unless ( defined($rv) ); } } } #=== FUNCTION ================================================================ # NAME: link_double_twat_episodes # PURPOSE: Find and link TwaT episodes with double hosts # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $doubles hashref holding double host details # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub link_double_twat_episodes { my ( $dbh1, $dbh2, $doubles, $verbose ) = @_; my ( $sth, $h, $rv ); # # Query to find TwaT shows with particular host names in the old database # $sth = $dbh1->prepare( q{ SELECT ep_num FROM twat_episodes WHERE host = ? } ) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Loop through the double hosts we collected # foreach my $double ( sort( keys(%{$doubles}) ) ) { # # Extract saved double and corresponding single details # my ( $doubleid, @newhosts ) = ( $doubles->{$double}->{double}, keys( %{ $doubles->{$double}->{singles} } ) ); # # Find TwaT shows marked as belonging to this double-host (in the old # database) # $sth->execute($double); if ( $dbh1->err ) { die $dbh1->errstr; } # # Process all the shows we found # while ( $h = $sth->fetchrow_hashref ) { my $eps_id = $h->{ep_num}; my $episode_key = sprintf("twat%04d",$eps_id); # # If one insert into the new database fails they all do # $dbh2->begin_work(); # # Add links for the single hosts to the new database # foreach my $host (@newhosts) { $rv = $dbh2->do( q{INSERT INTO episodes_hosts_xref SELECT e.episode_id, h.host_id FROM episodes e, hosts h WHERE e.episode_key = ? AND h.host = ?}, undef, $episode_key, $host); if ( $dbh2->err ) { warn $dbh2->errstr; } $rv = 0 if ( $rv eq '0E0' ); unless ( defined($rv) ) { warn "Problem adding link to episodes_hosts_xref for " . "$episode_key,$host\n"; } else { printf "Linked show %s for host %s\n", $episode_key, $host; } } # # Commit the inserts above # $dbh2->commit(); } } } #=== FUNCTION ================================================================ # NAME: link_double_hpr_episodes # PURPOSE: Find and link HPR episodes with double hosts # PARAMETERS: $dbh1 Handle for the MariaDB database # $dbh2 Handle for the Pg database # $doubles hashref holding double host details # $verbose Verbosity level # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub link_double_hpr_episodes { my ( $dbh1, $dbh2, $doubles, $verbose ) = @_; my ( $sth, $h, $rv ); # # Query to find HPR shows with particular host ids in the old database # $sth = $dbh1->prepare( q{ SELECT id AS eps_id FROM eps WHERE hostid = ? } ) or die $DBI::errstr; if ( $dbh1->err ) { warn $dbh1->errstr; } # # Loop through the double hosts we collected # foreach my $double ( sort( keys(%{$doubles}) ) ) { # # Extract saved double and corresponding single details # my ( $doubleid, @newhosts ) = ( $doubles->{$double}->{double}, keys( %{ $doubles->{$double}->{singles} } ) ); # # Find HPR shows marked as belonging to this double-host (in the old # database) # $sth->execute($doubleid); if ( $dbh1->err ) { die $dbh1->errstr; } # # Process all the shows we found # while ( $h = $sth->fetchrow_hashref ) { my $eps_id = $h->{eps_id}; my $episode_key = sprintf("hpr%04d",$eps_id); # # If one insert into the new database fails they all do # $dbh2->begin_work(); # # Add links for the single hosts to the new database # foreach my $host (@newhosts) { $rv = $dbh2->do( q{INSERT INTO episodes_hosts_xref SELECT e.episode_id, h.host_id FROM episodes e, hosts h WHERE e.episode_key = ? AND h.host = ?}, undef, $episode_key, $host); if ( $dbh2->err ) { warn $dbh2->errstr; } $rv = 0 if ( $rv eq '0E0' ); unless ( defined($rv) ) { warn "Problem adding link to episodes_hosts_xref for " . "$episode_key,$host\n"; } else { printf "Linked show %s for host %s\n", $episode_key, $host; } } # # Commit the inserts above # $dbh2->commit(); } } } #=== FUNCTION ================================================================ # NAME: save_hostid # PURPOSE: Saves the host id after searching for the key in the %doubles # hash # PARAMETERS: $doubles hashref to %doubles # $host host key # $hostid host id number # RETURNS: Nothing # DESCRIPTION: Searches the %doubles hash for particular keys in the # 'singles' sub-hash. If found saves the corresponding host id # there. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub save_hostid { my ( $doubles, $host, $hostid ) = @_; foreach my $key ( keys(%$doubles) ) { if ( exists( $doubles->{$key}->{singles}->{$host} ) ) { $doubles->{$key}->{singles}->{$host} = $hostid; } } } #=== FUNCTION ================================================================ # NAME: compute_host_date_added # PURPOSE: Determine the 'hosts.when_added' field once the database is # fully populated. # PARAMETERS: $dbh Handle for the PostgreSQL database # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub compute_host_date_added { my ($dbh) = @_; my $rv; # # Allocate when_added values where possible # $rv = $dbh->do( q{ UPDATE hosts SET when_added = sq.when_added FROM ( SELECT h.host_id,min(e.release_date) AS when_added FROM episodes e JOIN episodes_hosts_xref eh ON (e.episode_id = eh.episode_id) JOIN hosts h ON (h.host_id = eh.host_id) GROUP BY h.host_id ORDER by min(e.release_date)) AS sq WHERE hosts.host_id = sq.host_id } ); if ( $dbh->err ) { warn $dbh->errstr; } $rv = 0 if ( $rv eq '0E0' ); print "Added $rv dates to the 'when_added' column\n"; } #=== FUNCTION ================================================================ # NAME: alter_seq # PURPOSE: Ensure the PostgreSQL sequence associated with a table has the # correct value. # PARAMETERS: $dbh Handle for the PostgreSQL database # $table Table name for the query # $idname Id field name (primary key) # $sequence Sequence name # RETURNS: Nothing # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub alter_seq { my ( $dbh, $table, $idname, $sequence ) = @_; my ( $sth, $h, $rv, $maxid ); # # Find the maximum id number in the table # $sth = $dbh->prepare("SELECT max($idname) as maxid FROM $table") or die $DBI::errstr; if ( $dbh->err ) { warn $dbh->errstr; } $sth->execute; if ( $dbh->err ) { die $dbh->errstr; } # # Save the maximum # if ( $h = $sth->fetchrow_hashref ) { $maxid = $h->{maxid}; $sth->finish; } # # Reset the sequence one more than the maximum # $maxid++; $rv = $dbh->do("ALTER SEQUENCE $sequence RESTART WITH $maxid"); if ( $dbh->err ) { warn $dbh->errstr; } $rv = 0 if ( $rv eq '0E0' ); warn "Failed to reset $sequence\n" unless (defined($rv)); return; } #=== FUNCTION ================================================================ # NAME: check_table # PURPOSE: Check that a given PostgreSQL table is empty # PARAMETERS: $dbh Handle for the PostgreSQL database # $table Name of table # RETURNS: True if empty, otherwise false # DESCRIPTION: Simply perform a query on the nominated table which counts # rows. If the table does not exist a DBI method will fail (the # execute?), so we treat this as a 'no empty' to make the caller # take error action. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub check_table { my ( $dbh, $table ) = @_; my ( $sth, $h, $count ); $sth = $dbh->prepare("SELECT count(*) AS count FROM $table") or die $DBI::errstr; if ( $dbh->err ) { warn $dbh->errstr; return 0; } $sth->execute; if ( $dbh->err ) { warn $dbh->errstr; return 0; } if ( $h = $sth->fetchrow_hashref ) { $count = $h->{count}; $sth->finish; } return $count == 0; } #=== FUNCTION ================================================================ # NAME: default_email # PURPOSE: Make a default email address for hosts with none # PARAMETERS: $email Original email address # $regex Regular expression to check the email against # $template Template for building the default # $hostid Host id number to use in the default # RETURNS: The email address to be used # DESCRIPTION: If the email address matches a regular expression then # generate a default from the template and the host id, # otherwise just return the address untouched. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub default_email { my ( $email, $regex, $template, $hostid ) = @_; return ( $email =~ $regex ? sprintf( $template, $hostid ) : $email ); } #=== FUNCTION ================================================================ # NAME: nullif # PURPOSE: Tests a value and makes it 'undef' (equivalent to NULL in the # database) if it matches a regular expression. # PARAMETERS: $value Value to test # $regex Regular expression to match against # RETURNS: 'undef' if the values match, otherwise the original value # DESCRIPTION: This is very simple, just a wrapper around the test. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub nullif { my ( $value, $regex ) = @_; return $value unless defined($value); return ( $value =~ $regex ? undef : $value ); } #=== FUNCTION ================================================================ # NAME: coalesce # PURPOSE: To find the first defined argument and return it # PARAMETERS: Arbitrary number of arguments # RETURNS: The first defined argument or undef if there are none # DESCRIPTION: Modelled on the SQL function of the same name. It takes a list # of arguments, scans it for the first one that is not undefined # and returns it. If an argument is defined and it's an arrayref # then the referenced array is returned comma-delimited. This # allows calls such as "coalesce($var,'undef')" which returns # the value of $var if it's defined, and 'undef' if not and # doesn't break anything along the way. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub coalesce { foreach (@_) { if ( defined($_) ) { if ( ref($_) eq 'ARRAY' ) { return join( ',', @{$_} ); } else { return $_; } } } return; # implicit undef } #=== FUNCTION ================================================================ # NAME: Options # PURPOSE: Processes command-line options # PARAMETERS: $optref Hash reference to hold the options # RETURNS: Undef # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub Options { my ($optref) = @_; my @options = ( "help", "config=s", "debug=i", "dry-run!", "verbose+", "phases=s" ); if ( !GetOptions( $optref, @options ) ) { pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 ); } return; } __END__ #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Application Documentation #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #{{{ =head1 NAME copy_mysql_pg - copy the HPR database from MySQL to PostgreSQL =head1 VERSION This documentation refers to B