forked from HPR/hpr-tools
		
	
		
			
	
	
		
			709 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			709 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: fix_urls | ||
|  | # | ||
|  | #        USAGE: ./fix_urls [-help] [-doc] [-debug=N] [-dry-run] [-config=FILE] | ||
|  | #                       [-limit=N] tablename | ||
|  | # | ||
|  | #  DESCRIPTION: Scans the HPR database to find URLs which do not have the | ||
|  | #               'https:' scheme and correct them. | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.5 | ||
|  | #      CREATED: 2021-12-29 13:57:28 | ||
|  | #     REVISION: 2022-02-28 10:51:27 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use v5.16; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use utf8; | ||
|  | use feature qw{ postderef say signatures state }; | ||
|  | no warnings qw{ experimental::postderef experimental::signatures }; | ||
|  | 
 | ||
|  | use Getopt::Long; | ||
|  | use Pod::Usage; | ||
|  | 
 | ||
|  | use Config::General; | ||
|  | use DBI; | ||
|  | 
 | ||
|  | use SQL::Abstract; | ||
|  | 
 | ||
|  | use Log::Handler; | ||
|  | use Log::Handler::Output::File; | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.0.5'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script and directory names | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | $DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir    = "$ENV{HOME}/HPR/Database"; | ||
|  | my $configfile = "$basedir/.hpr_db.cfg"; | ||
|  | my $logfile    = "$basedir/${PROG}.log"; | ||
|  | 
 | ||
|  | my $dbh; | ||
|  | 
 | ||
|  |                                 #<<<  do not let perltidy change formatting | ||
|  | # | ||
|  | # The database tables we'll search and how to do what we need. | ||
|  | # | ||
|  | # main_key - the name of a table (with a hashref as the value) | ||
|  | #       index           the database field we're using to find a row | ||
|  | #       fields          fields we may need to edit (an arrayref) | ||
|  | #       logfields       fields we will log (an arrayref) | ||
|  | #       mainQ           the query that finds all rows needing work | ||
|  | #       rowQ            a query for finding a row using the index value | ||
|  | # | ||
|  | my %tables = ( | ||
|  |     'comments' => { | ||
|  |         index     => 'id', | ||
|  |         fields    => [qw{comment_title comment_text}], | ||
|  |         logfields => [qw{eps_id comment_title}], | ||
|  |         mainQ     => q{ | ||
|  |             select id | ||
|  |                 from comments | ||
|  |                 where comment_text regexp 'http://[^[:space:]]' | ||
|  |                     or comment_title regexp 'http://[^[:space:]]' | ||
|  |             order by id}, | ||
|  |         rowQ => q{select * from comments where id = ?}, | ||
|  |     }, | ||
|  |     'eps' => { | ||
|  |         index     => 'id', | ||
|  |         fields    => [qw{title summary notes}], | ||
|  |         logfields => [qw{title date}], | ||
|  |         mainQ     => q{ | ||
|  |             select id | ||
|  |                 from eps | ||
|  |                 where title regexp 'http://[^[:space:]]' | ||
|  |                     or summary regexp 'http://[^[:space:]]' | ||
|  |                     or notes regexp 'http://[^[:space:]]' | ||
|  |             order by id | ||
|  |             }, | ||
|  |         rowQ => q{select * from eps where id = ?}, | ||
|  |     }, | ||
|  |     'hosts' => { | ||
|  |         index     => 'hostid', | ||
|  |         fields    => [qw{profile}], | ||
|  |         logfields => [qw{host email}], | ||
|  |         mainQ     => q{ | ||
|  |             select hostid | ||
|  |                 from hosts | ||
|  |                 where profile regexp 'http://[^[:space:]]' | ||
|  |             order by hostid}, | ||
|  |         rowQ => q{select * from hosts where hostid = ?}, | ||
|  |     }, | ||
|  |     'miniseries' => { | ||
|  |         index     => 'id', | ||
|  |         fields    => [qw{description}], | ||
|  |         logfields => [qw{name}], | ||
|  |         mainQ     => q{ | ||
|  |             select id | ||
|  |                 from miniseries | ||
|  |                 where description regexp 'http://[^[:space:]]' | ||
|  |             order by id}, | ||
|  |         rowQ => q{select * from miniseries where id = ?}, | ||
|  |     }, | ||
|  | ); | ||
|  |                                 #>>> | ||
|  | my @table_names = keys(%tables); | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Options and arguments | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $DEF_DEBUG = 0; | ||
|  | my $DEF_LIMIT = 0; | ||
|  | 
 | ||
|  | # | ||
|  | # Process options | ||
|  | # | ||
|  | my %options; | ||
|  | Options( \%options ); | ||
|  | 
 | ||
|  | # | ||
|  | # Default help | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 ) | ||
|  |     if ( $options{'help'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Full documentation if requested with -doc | ||
|  | # | ||
|  | pod2usage( | ||
|  |     -msg       => "$PROG version $VERSION\n", | ||
|  |     -verbose   => 2, | ||
|  |     -exitval   => 1, | ||
|  |     -noperldoc => 0, | ||
|  | ) if ( $options{'doc'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Collect options | ||
|  | # | ||
|  | my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG ); | ||
|  | 
 | ||
|  | my $cfgfile | ||
|  |     = ( defined( $options{config} ) ? $options{config} : $configfile ); | ||
|  | 
 | ||
|  | my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 ); | ||
|  | my $verbose = ( defined( $options{'verbose'} ) ? $options{'verbose'} : 0 ); | ||
|  | 
 | ||
|  | my $limit = ( defined( $options{limit} ) ? $options{limit} : $DEF_LIMIT ); | ||
|  | $limit = abs($limit); | ||
|  | 
 | ||
|  | # | ||
|  | # Sanity check | ||
|  | # | ||
|  | die "Unable to find config file '$cfgfile'\n" unless ( -e $cfgfile ); | ||
|  | 
 | ||
|  | # | ||
|  | # Table choice | ||
|  | # | ||
|  | my $table = shift; | ||
|  | die "Database table not specified\n" unless $table; | ||
|  | die "Not a valid table name: $table\n" | ||
|  |     unless ( grep { $_ =~ /^$table$/ } @table_names ); | ||
|  | 
 | ||
|  | # | ||
|  | # Load configuration data | ||
|  | # | ||
|  | my $conf = new Config::General( | ||
|  |     -ConfigFile      => $cfgfile, | ||
|  |     -InterPolateVars => 1, | ||
|  |     -ExtendedAccess  => 1 | ||
|  | ); | ||
|  | my %config = $conf->getall(); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Connect to the database | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $dbhost = $config{database}->{host} // '127.0.0.1'; | ||
|  | my $dbport = $config{database}->{port} // 3306; | ||
|  | my $dbname = $config{database}->{name}; | ||
|  | my $dbuser = $config{database}->{user}; | ||
|  | my $dbpwd  = $config{database}->{password}; | ||
|  | #$dbh = DBI->connect( "DBI:MariaDB:host=$dbhost;port=$dbport;database=$dbname", | ||
|  | #    $dbuser, $dbpwd, { AutoCommit => 1, RaiseError => 1 } ) | ||
|  | #    or die $DBI::errstr; | ||
|  | 
 | ||
|  | $dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname", | ||
|  |     $dbuser, $dbpwd, { AutoCommit => 1 } ) | ||
|  |     or die $DBI::errstr; | ||
|  | 
 | ||
|  | # | ||
|  | # Enable client-side UTF8 | ||
|  | # | ||
|  | $dbh->{mysql_enable_utf8} = 1; | ||
|  | 
 | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Set up logging keeping the default log layout except for the date | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $log = Log::Handler->new(); | ||
|  | 
 | ||
|  | $log->add( | ||
|  |     file => { | ||
|  |         timeformat => "%Y-%m-%d %H:%M:%S", | ||
|  |         filename   => $logfile, | ||
|  |         maxlevel   => 7, | ||
|  |         minlevel   => 0, | ||
|  |         utf8       => 1, | ||
|  |     } | ||
|  | ); | ||
|  | 
 | ||
|  | $log->info("Configuration file $cfgfile"); | ||
|  | $log->info("Processing table $table"); | ||
|  | $log->info("Dry-run mode") if ($dry_run); | ||
|  | 
 | ||
|  | process_table( $dbh, \%tables, $table, $dry_run, $limit ); | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: process_table | ||
|  | #      PURPOSE: Processes a table to change any instances of 'http://' to | ||
|  | #               'https://' | ||
|  | #   PARAMETERS: $dbh            open database handle | ||
|  | #               $rtables        reference to the %tables hash | ||
|  | #               $table          name of the table being processed | ||
|  | #               $dry_run        Boolean showing whether this is dry run or not | ||
|  | #               $limit          number of updates to apply, 0 = no limit | ||
|  | #      RETURNS: | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub process_table { | ||
|  |     my ( $dbh, $rtables, $table, $dry_run, $limit ) = @_; | ||
|  | 
 | ||
|  |     my ( $sth1,       $sth2,  $h ); | ||
|  |     my ( $work_query, @work,  $row_query, @fields, @logfields, $new, $index ); | ||
|  |     my ( $sql,        $stmt,  @bind,      %fieldvals, %where ); | ||
|  |     my ( $workcount,  $count, $updates,   $logfmt ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prepare to build SQL | ||
|  |     # | ||
|  |     $sql = SQL::Abstract->new; | ||
|  | 
 | ||
|  |     $count   = 0; | ||
|  |     $updates = 0; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find any rows in need of work as an array/list of the index values. The | ||
|  |     # selectall_arrayref returns a reference to an array containing arrayrefs, | ||
|  |     # so the 'map' flattens that structure. | ||
|  |     # | ||
|  |     $work_query = $rtables->{$table}->{mainQ}; | ||
|  |     @work       = map { $_->[0] } @{ $dbh->selectall_arrayref($work_query) }; | ||
|  | 
 | ||
|  |     $workcount = scalar(@work); | ||
|  |     printf "Number of rows requiring work: %d\n", $workcount if $verbose; | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 1, "Number of rows requiring work: $workcount" ) | ||
|  |         unless $verbose; | ||
|  |     _debug( $DEBUG >= 2, | ||
|  |         "Rows requiring work: " . join( ",", @work ) . "\n" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # If there's nothing to do say so and leave | ||
|  |     # | ||
|  |     unless (@work) { | ||
|  |         print "Nothing to do to table '$table'!\n"; | ||
|  |         return; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Pull configuration values from the hash | ||
|  |     # | ||
|  |     $row_query = $rtables->{$table}->{rowQ}; | ||
|  |     @fields    = @{ $rtables->{$table}->{fields} }; | ||
|  |     @logfields = @{ $rtables->{$table}->{logfields} }; | ||
|  |     $index     = $rtables->{$table}->{index}; | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 3, "\$row_query = $row_query" ); | ||
|  |     _debug( $DEBUG >= 3, "\@fields = " . join( ",", @fields ) ); | ||
|  |     _debug( $DEBUG >= 3, "\$index = $index" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Prepare for logging by making a format string for sprintf | ||
|  |     # | ||
|  |     $logfmt = 'Updated row with '; | ||
|  |     $logfmt .= join( ", ", map {"$_ = '%s'"} $index, @logfields ); | ||
|  | 
 | ||
|  |     _debug( $DEBUG >= 3, "\$logfmt = $logfmt" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Set up query for the next eligible row | ||
|  |     # | ||
|  |     $sth1 = $dbh->prepare($row_query) or die $DBI::errstr; | ||
|  |     if ( $dbh->err ) { | ||
|  |         die $dbh->errstr; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Loop through rows needing work | ||
|  |     # | ||
|  |     foreach my $pkey (@work) { | ||
|  |         # | ||
|  |         # The row is indexed by the per-table key | ||
|  |         # | ||
|  |         $sth1->execute($pkey) or die $DBI::errstr; | ||
|  |         if ( $dbh->err ) { | ||
|  |             die $dbh->errstr; | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Grab the row (there must be only one with this index) | ||
|  |         # | ||
|  |         if ( $h = $sth1->fetchrow_hashref ) { | ||
|  |             # | ||
|  |             # Set up the 'where' options for SQL::Abstract | ||
|  |             # | ||
|  |             %where = ( $index => { '=', $pkey } ); | ||
|  |             _debug( $DEBUG >= 3, Dumper( \%where ) ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Work on the fields we know might contain HTML | ||
|  |             # | ||
|  |             for my $field (@fields) { | ||
|  |                 # | ||
|  |                 # Perform the change | ||
|  |                 # | ||
|  |                 ( $new = $h->{$field} ) =~ s{\bhttp://(\S)}{https://$1}ig; | ||
|  | 
 | ||
|  |                 # | ||
|  |                 # Set up SQL::Abstract parameters | ||
|  |                 # | ||
|  |                 $fieldvals{$field} = $new; | ||
|  |             } | ||
|  | 
 | ||
|  |             # | ||
|  |             # Use SQL::Abstract to make the statement and the bind parameters | ||
|  |             # | ||
|  |             ( $stmt, @bind ) = $sql->update( $table, \%fieldvals, \%where ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Do the change or report it depending on dry-run mode | ||
|  |             # | ||
|  |             unless ($dry_run) { | ||
|  |                 $sth2 = $dbh->prepare($stmt) or die $DBI::errstr; | ||
|  |                 $sth2->execute(@bind)        or die $DBI::errstr; | ||
|  |                 $log->info( sprintf( $logfmt, $pkey, @{$h}{@logfields} ) ); | ||
|  |                 printf $logfmt. "\n", $pkey, @{$h}{@logfields} if $verbose; | ||
|  |                 $updates++; | ||
|  |             } | ||
|  |             else { | ||
|  |                 print "No change made in dry-run mode\n"; | ||
|  |                 if ($verbose) { | ||
|  |                     print "SQL: $stmt\n"; | ||
|  |                     print "Bind> ", join( "\nBind> ", @bind ), "\n"; | ||
|  |                     print '-' x 80, "\n"; | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Apply the limit if appropriate | ||
|  |         # | ||
|  |         $count++; | ||
|  |         unless ( $limit == 0 ) { | ||
|  |             last if ( $count >= $limit ); | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     unless ($dry_run) { | ||
|  |         $log->info("Number of updates = $updates"); | ||
|  |         if ($verbose) { | ||
|  |             print "Number of updates = $updates\n"; | ||
|  |             printf "Remaining rows needing attention: %d\n", | ||
|  |                 $workcount - $updates; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: concat | ||
|  | #      PURPOSE: Reimplementation of join but with any undefined or empty | ||
|  | #               arguments removed | ||
|  | #   PARAMETERS: $sep            The string to be used to separate elements in | ||
|  | #                               the result | ||
|  | #               [variable args] Any number of arguments to be joined together | ||
|  | #                               with the separator | ||
|  | #      RETURNS: The concatenated arguments | ||
|  | #  DESCRIPTION: Giving 'join' an array that may contain undefined elements will | ||
|  | #               result in empty results in the output string and error | ||
|  | #               messages as the undefined elements are processed. Giving it | ||
|  | #               empty string elements will result in dangling separators in | ||
|  | #               the output. This routine removes the undefined and empty | ||
|  | #               elements before joining the rest. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: | ||
|  | #=============================================================================== | ||
|  | sub concat { | ||
|  |     my $sep = shift; | ||
|  | 
 | ||
|  |     my @args = grep { defined($_) && length($_) > 0 } @_; | ||
|  | 
 | ||
|  |     return join( $sep, @args ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: _debug | ||
|  | #      PURPOSE: Prints debug reports | ||
|  | #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | ||
|  | #               $message        Message to print | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | ||
|  | #               newline and then adds one in the 'print' to the caller doesn't | ||
|  | #               have to bother. Prepends the message with 'D> ' to show it's | ||
|  | #               a debug message. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub _debug { | ||
|  |     my ( $active, $message ) = @_; | ||
|  | 
 | ||
|  |     chomp($message); | ||
|  |     print "D> $message\n" if $active; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Options | ||
|  | #      PURPOSE: Processes command-line options | ||
|  | #   PARAMETERS: $optref     Hash reference to hold the options | ||
|  | #      RETURNS: Undef | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: no exceptions | ||
|  | #     COMMENTS: none | ||
|  | #     SEE ALSO: n/a | ||
|  | #=============================================================================== | ||
|  | sub Options { | ||
|  |     my ($optref) = @_; | ||
|  | 
 | ||
|  |     my @options = ( | ||
|  |         "help",     "doc",      "debug=i", "verbose!", | ||
|  |         "dry-run!", "config=s", "limit=i", | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ( !GetOptions( $optref, @options ) ) { | ||
|  |         pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | __END__ | ||
|  | 
 | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #  Application Documentation | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #{{{ | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | fix_urls - Fixes 'http://' urls in the HPR database | ||
|  | 
 | ||
|  | =head1 VERSION | ||
|  | 
 | ||
|  | This documentation refers to fix_urls version 0.0.5 | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 USAGE | ||
|  | 
 | ||
|  |     ./fix_urls [-help] [-doc] [-debug=N] [-[no]dry-run] [-[no]verbose] | ||
|  |         [-config=FILE] [-limit=N] tablename | ||
|  | 
 | ||
|  |     fix_urls -help | ||
|  |     fix_urls -doc | ||
|  | 
 | ||
|  |     fix_urls -limit=10 -dry-run comments | ||
|  |     fix_urls -limit=10 comments | ||
|  |     fix_urls -limit=10 -verbose comments | ||
|  | 
 | ||
|  |     fix_urls -config=.hpr_livedb.cfg -debug=1 -dry-run -limit=1 comments | ||
|  | 
 | ||
|  | =head1 REQUIRED ARGUMENTS | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<tablename> | ||
|  | 
 | ||
|  | The mandatory argument required by the script is the name of the table to | ||
|  | process. The choices are: | ||
|  | 
 | ||
|  |     comments | ||
|  |     eps | ||
|  |     hosts | ||
|  |     miniseries | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | =over 8 | ||
|  | 
 | ||
|  | =item B<-help> | ||
|  | 
 | ||
|  | Prints a brief help message describing the usage of the program, and then exits. | ||
|  | 
 | ||
|  | =item B<-doc> | ||
|  | 
 | ||
|  | Displays the entirety of the documentation (using a pager), and then exits. To | ||
|  | generate a PDF version use: | ||
|  | 
 | ||
|  |     pod2pdf fix_urls --out=fix_urls.pdf | ||
|  | 
 | ||
|  | =item B<-debug=N> | ||
|  | 
 | ||
|  | Selects a level of debugging. Debug information consists of a line or series | ||
|  | of lines prefixed with the characters 'D>': | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<0> | ||
|  | 
 | ||
|  | No debug output is generated: this is the default | ||
|  | 
 | ||
|  | =item B<1> | ||
|  | 
 | ||
|  | Displays the number of updates required in a table. | ||
|  | 
 | ||
|  | =item B<2> | ||
|  | 
 | ||
|  | As for level 1, and also displays the primary key values of all rows requiring | ||
|  | work in the table. | ||
|  | 
 | ||
|  | =item B<3> | ||
|  | 
 | ||
|  | As for level 2, and also displays some internal values for verification. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =item B<-[no]dry-run> | ||
|  | 
 | ||
|  | Controls whether the program runs in a mode where it performs database | ||
|  | updates. When enabled the details of the updates to be performed are shown, | ||
|  | otherwise the  updates are applied. The default B<-nodry-run> allows the | ||
|  | program to perform the changes. | ||
|  | 
 | ||
|  | =item B<-[no]verbose> | ||
|  | 
 | ||
|  | Normally very little is reported by the script, although details of which rows | ||
|  | have been changed are logged. When B<-verbose> is selected more information | ||
|  | about the number of rows needing work, the updates performed (or which would | ||
|  | have been performed) and how many changes were made is reported. | ||
|  | 
 | ||
|  | =item B<-limit=N> | ||
|  | 
 | ||
|  | This option allows the number of rows in the chosen table to be limited during | ||
|  | a B<-dry-run> pass or an update pass. If omitted, or if a value of zero is | ||
|  | given, then all eligible rows are processed. | ||
|  | 
 | ||
|  | =item B<-config=FILE> | ||
|  | 
 | ||
|  | This option allows an alternative configuration file to be used. This file | ||
|  | defines the location of the database, its port, its name and the username and | ||
|  | password to be used to access it. This feature was added to allow the script | ||
|  | to access alternative databases or the live database over an SSH tunnel. | ||
|  | 
 | ||
|  | See the CONFIGURATION AND ENVIRONMENT section below for the file format. | ||
|  | 
 | ||
|  | If the option is omitted the default file is used: B<.hpr_db.cfg> | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | The B<fix_urls> script performs edits on fields in tables in the HPR database. | ||
|  | As written, the purpose is to change all occurrences of 'http://' to | ||
|  | 'https://', though it could be used for other tasks. It is not designed to be | ||
|  | easily changeable from one to another, but the code can be changed to do this | ||
|  | if needed. | ||
|  | 
 | ||
|  | A single table is processed in each run, and the number of rows may be limited | ||
|  | if required by using the B<-limit=N> option. | ||
|  | 
 | ||
|  | The eligible tables are defined in a hash structure B<%tables> which defines | ||
|  | the fields to be processed and the queries needed to search for all rows | ||
|  | requiring work and to get a particular row to work on. It also defines which | ||
|  | fields are to be reported in the log file. | ||
|  | 
 | ||
|  | A log file is appended to when the script is run, which has the name | ||
|  | B<fix_urls.log> in the same directory as the script. | ||
|  | 
 | ||
|  | =head1 DIAGNOSTICS | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<Unable to find config file '...'> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The configuration file in the B<-config=FILE> option cannot be found. | ||
|  | 
 | ||
|  | =item B<Database table not specified> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The mandatory table name argument was not provided. | ||
|  | 
 | ||
|  | =item B<Not a valid table name: ...> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The mandatory table name argument specified an unknown table name. | ||
|  | 
 | ||
|  | =item B<[DBI error messages]> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | Generated when a database interface error has been detected, such as failure | ||
|  | to connect to the database or failure to prepare or execute a query. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 CONFIGURATION AND ENVIRONMENT | ||
|  | 
 | ||
|  | The script obtains the credentials it requires to open a local copy of the HPR | ||
|  | database from a configuration file. The name of the file it expects is | ||
|  | B<.hpr_db.cfg> in the directory holding the script. This configuration file | ||
|  | can be overridden using the B<-config=FILE> option as described above. | ||
|  | 
 | ||
|  | The configuration file format is as follows: | ||
|  | 
 | ||
|  |  <database> | ||
|  |      host = 127.0.0.1 | ||
|  |      port = PORT | ||
|  |      name = DATABASE | ||
|  |      user = USERNAME | ||
|  |      password = PASSWORD | ||
|  |  </database> | ||
|  | 
 | ||
|  | =head1 DEPENDENCIES | ||
|  | 
 | ||
|  |     Config::General | ||
|  |     DBI | ||
|  |     Data::Dumper | ||
|  |     Getopt::Long | ||
|  |     Log::Handler | ||
|  |     Pod::Usage | ||
|  |     SQL::Abstract | ||
|  | 
 | ||
|  | =head1 BUGS AND LIMITATIONS | ||
|  | 
 | ||
|  | There are no known bugs in this module. | ||
|  | Please report problems to <Dave Morriss>  (<Dave.Morriss@gmail.com>) | ||
|  | Patches are welcome. | ||
|  | 
 | ||
|  | =head1 AUTHOR | ||
|  | 
 | ||
|  | <Dave Morriss>  (<Dave.Morriss@gmail.com>) | ||
|  | 
 | ||
|  | =head1 LICENCE AND COPYRIGHT | ||
|  | 
 | ||
|  | Copyright (c) 2021-2020 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. | ||
|  | 
 | ||
|  | This module is free software; you can redistribute it and/or | ||
|  | modify it under the same terms as Perl itself. See perldoc perlartistic. | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | #}}} | ||
|  | 
 | ||
|  | # [zo to open fold, zc to close] | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||
|  | 
 |