hpr-tools/Database/query2tt2
Dave Morriss dc0f29e957 Updates since 2024-06-15
Database/query2tt2: comment and documentation updates; use of Perl's
    try/catch.

InternetArchive/.make_metadata.cfg: added comments for readability

InternetArchive/make_metadata: bug fix needed now that all shows on the HPR server have
    a directory with assets under it.

InternetArchive/repair_assets: new Bash script in development. Collects
    assets from the IA and uploads them to a new directory on the HPR
    server. Will run 'fix_asset_links' (to repair asset links for their
    new directories) once it is ready.

InternetArchive/repair_item: Bash script which was originally written to
    run on 'borg' and upload files to a new IA item when the uploads
    timed out. Now enhanced to upload missing files recovered from the
    HPR backup disk, such as transcripts.
2024-07-16 21:39:28 +01:00

625 lines
18 KiB
Perl
Executable File

#!/usr/bin/env perl
#===============================================================================
#
# FILE: query2tt2
#
# USAGE: ./query2tt2 [-help] [-debug=N] [-config=FILE] [-query=FILE]
# [-template=FILE]
# [-dbarg=ARG1 [-dbarg=ARG2] ...]
# [-define KEY1=VALUE1 [-define KEY2=VALUE2] ...
# [-define KEYn=VALUEn]] [QUERY]
#
# DESCRIPTION: Built for use with the Hacker Public Radio database, but could
# be used in any context with a MariaDB database.
# Runs a query given as the only argument (or in a file).
# Caution is needed since *any* query will be run, not just
# SELECT commands. The result of the query is output in
# a specified format defined by a template on STDOUT. The query
# can have arguments provided by '-dbarg=ARG' to be used in '?'
# placeholders in the SQL. The template can receive variables
# through the option '-define KEY=VALUE'. A configuration file
# is needed, though there is a default ('.hpr_db.cfg'), which
# accesses the local snapshot.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.5
# CREATED: 2021-06-18 13:24:49
# REVISION: 2024-06-29 18:42:49
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use open ':encoding(UTF-8)';
# Using experimental features, some of which require warnings to be turned off
use feature qw{ say try };
no warnings qw{
experimental::try
};
use Getopt::Long;
use Pod::Usage;
use Config::General;
#use Try::Tiny;
use File::Slurper qw{ read_text };
use Hash::Merge;
use Template;
use DBI;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.5';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Database";
my $configfile = "$basedir/.hpr_db.cfg";
my ( $dbh, $sth1 );
my ( $query, $result, @names, $document );
#
# Default template iterates through all rows in the 'result' matrix and for
# each row displays the field name (key) from array 'names', and its value.
# There's a blank line after each row.
#
my $def_template = <<'ENDTPL';
[% FOREACH row IN result -%]
[% FOREACH key IN names -%]
[% key %]: [% row.$key %]
[% END -%]
[% END -%]
ENDTPL
#-------------------------------------------------------------------------------
# There should be no need to edit anything after this point
#-------------------------------------------------------------------------------
#
# Enable Unicode mode
#
#binmode STDOUT, ":encoding(UTF-8)";
#binmode STDERR, ":encoding(UTF-8)";
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
my %options;
Options( \%options );
#
# Default help
#
pod2usage( -msg => "Version $VERSION\n", -exitval => 1, -verbose => 0 )
if ( $options{'help'} );
#
# Full documentation if requested with -doc
#
pod2usage(
-msg => "$PROG version $VERSION\n",
-verbose => 2,
-exitval => 1,
-noperldoc => 0,
) if ( $options{'doc'} );
#
# Collect options
#
my $DEBUG = ( $options{'debug'} ? $options{'debug'} : 0 );
my $cfgfile
= ( defined( $options{config} ) ? $options{config} : $configfile );
my $queryfile = $options{query};
my $template = $options{template};
my @dbargs = _dbargs( \%options );
my %defs = _define( \%options );
_debug( $DEBUG >= 3, '@dbargs: ' . join( ',', @dbargs ) );
_debug( $DEBUG >= 3, '%defs: ' . Dumper(\%defs) );
#-------------------------------------------------------------------------------
# Option checks and defaults
#-------------------------------------------------------------------------------
die "Unable to find configuration file $cfgfile\n" unless ( -e $cfgfile );
_debug( $DEBUG >= 3, '$cfgfile: ' . $cfgfile );
#
# Query is an argument string or is in a file
#
if ($queryfile) {
die "Unable to find query file $queryfile\n" unless ( -e $queryfile );
$query = read_text($queryfile);
}
else {
$query = shift;
pod2usage( -msg => "Please specify a SQL query\n", -exitval => 1 )
unless $query;
}
_debug( $DEBUG >= 3, '$query: ' . Dumper(\$query) );
#
# Template is the default pre-defined string or a filename
#
if ($template) {
die "Unable to find template $template\n" unless ( -e $template );
}
else {
$template = \$def_template;
}
_debug(
$DEBUG >= 3,
'$template: '
. (ref($template) eq ''
? "filename $template"
: "reference to string\n$$template")
);
#-------------------------------------------------------------------------------
# Load database configuration data
#-------------------------------------------------------------------------------
my $conf = Config::General->new(
-ConfigFile => $cfgfile,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config = $conf->getall();
#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd = $config{database}->{password};
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
$dbuser, $dbpwd, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;
#
# Set up the query
#
$sth1 = $dbh->prepare($query) or die $DBI::errstr;
if ( $dbh->err ) {
die $dbh->errstr;
}
#
# Perform the query
#
try {
$sth1->execute(@dbargs);
if ( $dbh->err ) {
die $dbh->errstr;
}
}
catch ($e) {
#
# The 'die' above was triggered. The error is in $_.
#
my $pcount = grep {/\?/} split( '', $query );
my $acount = scalar(@dbargs);
print STDERR "Failed to execute query.\n";
print STDERR "Placeholder/Argument mismatch: $pcount/$acount\n";
exit;
}
#
# Grab everything from the query as an arrayref of hashrefs
#
$result = $sth1->fetchall_arrayref( {} );
_debug( $DEBUG >= 3, '$result: ' . Dumper($result) );
#
# Collect field names
#
@names = @{$sth1->{NAME}};
_debug( $DEBUG >= 3, '@names: ' . Dumper(\@names) );
#
# Set up the template
#
my $tt = Template->new(
{ ABSOLUTE => 1,
ENCODING => 'utf8',
INCLUDE_PATH => $basedir,
}
);
#
# Send collected data to the template
#
my $vars = { names => \@names, result => $result, };
if (%defs) {
#
# If we have definitions add them to $vars
#
my $merge = Hash::Merge->new('LEFT_PRECEDENT');
my %merged = %{ $merge->merge( $vars, \%defs ) };
$vars = \%merged;
}
_debug( $DEBUG >= 3, '$vars: ' . Dumper($vars) );
$tt->process( $template, $vars, \$document, { binmode => ':utf8' } )
|| die $tt->error(), "\n";
print $document;
exit;
#=== FUNCTION ================================================================
# NAME: _debug
# PURPOSE: Prints debug reports
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
# $message Message to print
# RETURNS: Nothing
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
# newline and then adds one in the 'print' to the caller doesn't
# have to bother. Prepends the message with 'D> ' to show it's
# a debug message.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _debug {
my ( $active, $message ) = @_;
chomp($message);
print STDERR "D> $message\n" if $active;
}
#=== FUNCTION ================================================================
# NAME: _dbargs
# PURPOSE: Collects database arguments for the main query
# PARAMETERS: $opts hash reference holding the options
# RETURNS: An array holding all of the arguments
# DESCRIPTION: If there are -dbargs options they will be an array in the hash
# returned by Getopt::Long. We return the array to the caller.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _dbargs {
my ($opts) = @_;
my @args;
if ( defined( $opts->{dbargs} ) ) {
@args = @{ $opts->{dbargs} };
}
return (@args);
}
#=== FUNCTION ================================================================
# NAME: _define
# PURPOSE: Handles multiple instances of the option '-define x=42'
# PARAMETERS: $opts hash reference holding the options
# RETURNS: A hash containing all of the named items (e.g. { 'x' => 42 })
# DESCRIPTION: If there are -define options they will be a hashref in the hash
# returned by Getopt::Long. We return the internal hash to the
# caller. Doesn't handle the issue that we don't want the keys
# 'names' and 'result', though perhaps it should.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO:
#===============================================================================
sub _define {
my ($opts) = @_;
my %defs;
if ( defined( $opts->{define} ) ) {
%defs = %{ $opts->{define} };
}
return (%defs);
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION:
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = (
"help", "doc", "debug=i", "config=s",
"query=s", "template=s", "dbargs=s@", "define=s%",
);
if ( !GetOptions( $optref, @options ) ) {
pod2usage( -msg => "Version $VERSION\n", -exitval => 1 );
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
query2tt2 - A script for formatting a report from database query using a template
=head1 VERSION
This documentation refers to query2tt2 version 0.0.5
=head1 USAGE
query2tt2 [-help] [-doc] [-debug=N] [-config=FILE] [-query=FILE]
[-template=FILE] [-dbargs=ARG1 [-dbarg=ARG2] ...]
[define KEY1=VALUE [define key2=VALUE2] ...] [QUERY]
query2tt2 -help
query2tt2 -query=tag_query_580-589.sql
query2tt2 -config=.hpr_livedb.cfg -template=query2tt2_taglist.tpl \
'select id,summary,tags from eps where id between 580 AND 589 AND (length(summary) = 0 or length(tags) = 0) ORDER BY id'
query2tt2 -config=.hpr_livedb.cfg -query=hosts_showcount.sql \
-dbargs '2021-01-01' -dbargs '2021-12-31' \
-def year=2021 -template=~/HPR/Community_News/hosts_list.tpl
=head1 OPTIONS
=over 4
=item B<-help>
Prints a brief help message describing the usage of the program, and then exits.
=item B<-doc>
Displays the entirety of the documentation (using a pager), and then exits. To
generate a PDF version use:
pod2pdf query2tt2 --out=query2tt2.pdf
=item B<-debug=N>
Selects a level of debugging. Debug information consists of a line or series
of lines prefixed with the characters 'D>':
=over 4
=item B<0>
No debug output is generated: this is the default
=item B<3>
Prints all data structures from options or from the database
=back
(The debug levels need work!)
=item B<-config=FILE>
This option allows an alternative configuration file to be used. This file
defines the location of the database, its port, its name and the username and
password to be used to access it. This feature was added to allow the script
to access alternative databases or the live HPR database over an SSH tunnel.
See the CONFIGURATION AND ENVIRONMENT section below for the file format.
If the option is omitted the default file is used: B<.hpr_db.cfg>
=item B<-query=FILE>
The script needs an SQL query to be applied to the database. This may be
supplied as a file, in which case this option gives the name of the file.
Alternatively the query can be given as a delimited string on the command
line.
If neither method is used the script aborts with an error message.
=item B<-dbarg=ARG> [ B<-dbarg=ARG> ... ]
The query can have place holders ('?') in it and the corresponding values for
these placeholders can be passed to the script through the B<-dbarg=ARG>
option. The option can be repeated as many times as required and the order of
B<ARG> values is preserved.
=item B<-template=FILE>
The results of the query are fed to the Template Toolkit system for
reformatting. This option provides the name of the template definition file.
If this option is omitted then the script uses a very simple internal template
which is roughly equivalent to the effect in MySQL/MariaDB of ending a query
with I<\G>.
See below in the B<DESCRIPTION> section for the constraints imposed on the
contents of the template.
Output from the template is written to STDOUT.
=item B<-define KEY1=VALUE1> [ B<-define KEY2=VALUE2> ... B<-define KEYn=VALUEn> ]
The Template Toolkit (TT2) template may receive values from the command line
using this option. The argument to the B<-define> option is a B<key=value>
pair. Keys should be unique otherwise they will overwrite one another. They
should also not be 'names' or 'result' because these keys are used internally
(for the data from the database). See below for more details. The keys will
become TT2 variables and the values will be assigned to them.
=back
=head1 DESCRIPTION
The purpose of the script is to run a query against the HPR database (a local
copy or the live one on the server over an SSH tunnel). The database choice is
made via a configuration file. The default file points to the local database,
but the alternative (discussed later) accesses the live database.
The data returned from the query is then passed through a Template Toolkit
template so that it can be formatted. There are many ways in which this can be
done. A default template is built into the script which displays the data in
a very simple form.
A knowledge of the Template Toolkit package is required to write templates.
The template receives two data structures:
=over 4
=item B<names>
This is an array of the field (column) names used in the query in the order
they are referenced. This is to help with writing out fields in the same order
as the query, if this is required.
=item B<result>
This is an array of hashes returned from the query. Relational databases
return sets which are effectively tables or matrices of information. Perl
represents this structure as an array of hashes where each array element
corresponds to a row in the returned table, and each hash contains the fields
or columns. Perl does not guarantee hash key ordering, so the B<names> array
(above) is provided to ensure order is preserved.
=back
=head1 DIAGNOSTICS
=over 4
=item B<Unable to find configuration file ...>
The nominated (or default) configuration file could not be found.
=item B<Unable to find query file ...>
The nominated query file could not be found.
=item B<Couldn't open ...: ...>
The nominated query file could not be opened.
=item B<Unable to find template file ...>
The nominated template file could not be found.
=item B<various database errors>
An error has occurred while performing a database operation.
=item B<Failed to execute query.>
There is a mismatch between the number of placeholders in the query ('?'
characters) and the number of arguments provided through the B<-dbargs=ARG>
option. The script will attempt to analyse whether there are too many or too
few arguments
=item B<Template Toolkit error>
An error has occurred while processing the template.
=back
=head1 CONFIGURATION AND ENVIRONMENT
The script obtains the credentials it requires to open the MariaDB database
from a configuration file. The name of the file it expects is B<.hpr_db.cfg>
in the directory holding the script. This configuration file can be overridden
using the B<-config=FILE> option as described above.
The configuration file format is as follows:
<database>
host = 127.0.0.1
port = PORT
name = DATABASE
user = USERNAME
password = PASSWORD
</database>
=head1 DEPENDENCIES
Config::General
DBI
Data::Dumper
File::Slurper
Getopt::Long
Hash::Merge
Pod::Usage
Template
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
Patches are welcome.
=head1 AUTHOR
Dave Morriss (Dave.Morriss@gmail.com)
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2021, 2022, 2024 Dave Morriss (Dave.Morriss@gmail.com). All
rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker