Updated query2json, and tidied query2csv

This commit is contained in:
Dave Morriss 2025-05-07 10:06:01 +01:00
parent 118ee00677
commit 586c8e537e
2 changed files with 529 additions and 66 deletions

View File

@ -49,7 +49,7 @@ use Data::Dumper;
our $VERSION = '0.0.4';
#
# Script and directory names
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
@ -141,6 +141,9 @@ _debug( $DEBUG >= 3, '$query: ' . Dumper(\$query) );
$pcount = grep {/\?/} split( '', $query );
$acount = scalar(@dbargs);
#
# Check the placeholder and argument counts are the same
#
if ( $pcount ne $acount) {
say STDERR "Query placeholder vs argument mismatch";
say STDERR "Placeholders = $pcount, Arguments = $acount";
@ -543,8 +546,8 @@ MySQL/MariaDB format:
<database>
dbtype = MySQL
host = 127.0.0.1
name = hpr_hpr
user = hpradmin
name = DBNAME
user = DBUSER
password = PASSWORD
</database>

View File

@ -3,29 +3,40 @@
#
# FILE: query2json
#
# USAGE: ./query2json query
# USAGE: ./query2json [-help] [-documentation|-man] [-debug=N]
# [-config=FILE] [-query=FILE]
# [-dbarg=ARG1 [-dbarg=ARG2] ...] [QUERY]
#
# DESCRIPTION: Runs a query given as the only argument. Caution is needed
# since *any* query will be run. The result of the query is
# output in JSON form on STDOUT.
# DESCRIPTION: Runs a query given as the only argument, or provided in
# a file. Caution is needed since *any* query will be run. The
# result of the query is output in JSON form on STDOUT or to
# a file. If the query contains '?' placeholders they can be
# filled with -dbarg=ARG options.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# VERSION: 0.0.3
# CREATED: 2021-06-18 13:24:49
# REVISION: 2023-01-05 16:17:24
# REVISION: 2025-05-07 09:50:34
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use v5.40;
use utf8;
use open ':std', ':encoding(UTF-8)';
use feature qw{ say try };
use Cwd qw( getcwd abs_path ); # Detecting where the script lives
use Getopt::Long;
use Pod::Usage;
use Config::General;
use File::Slurper qw{ read_text };
use JSON;
use DBI;
@ -34,14 +45,12 @@ use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';
our $VERSION = '0.0.3';
#
# Script and directory names
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
@ -49,58 +58,137 @@ $DIR = '.' unless $DIR;
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Database";
my $configfile = "$basedir/.hpr_livedb.cfg";
#
# Make a variable to hold the working directory where the script is located
#
( my $basedir = abs_path($0) ) =~ s|/?[^/]*$||mx;
my $configfile = "$basedir/.hpr_sqlite.cfg";
my ( $dbh, $sth1, $aref1 );
my ( $query, $result, $json );
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#
# Load database configuration data
#
my $conf = Config::General->new(
-ConfigFile => $configfile,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config = $conf->getall();
my ( $pcount, $acount );
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
$query = shift;
die "Usage: $PROG query\n" unless $query;
my %options;
Options( \%options );
#
# Default help
#
pod2usage( -msg => "Version $VERSION\n", -exitval => 1, -verbose => 0 )
if ( $options{'help'} );
#
# Full documentation if requested with -doc
#
pod2usage(
-msg => "$PROG version $VERSION\n",
-verbose => 2,
-exitval => 1,
-noperldoc => 0,
) if ( $options{'documentation'} );
#
# Collect options
#
my $DEBUG = ( $options{'debug'} ? $options{'debug'} : 0 );
my $cfgfile
= ( defined( $options{config} ) ? $options{config} : $configfile );
my $queryfile = $options{query};
my $outfile = $options{output};
_debug( $DEBUG >= 3, '$outfile: ' . $outfile ) if ($outfile);
my @dbargs = _dbargs( \%options );
_debug( $DEBUG >= 3, '@dbargs: ' . join( ',', @dbargs ) );
#-------------------------------------------------------------------------------
# Option checks and defaults
#-------------------------------------------------------------------------------
die "Unable to find configuration file $cfgfile\n" unless ( -e $cfgfile );
_debug( $DEBUG >= 3, '$cfgfile: ' . $cfgfile );
#
# Query is an argument string or is in a file
#
if ($queryfile) {
die "Unable to find query file $queryfile\n" unless ( -e $queryfile );
$query = read_text($queryfile);
}
else {
$query = shift;
pod2usage(
-msg => "Please specify a SQL query\n",
-exitval => 1,
-verbose => 0
)
unless $query;
}
_debug( $DEBUG >= 3, '$query: ' . Dumper(\$query) );
#
# Count placeholders in the query and the arguments provided
#
$pcount = grep {/\?/} split( '', $query );
$acount = scalar(@dbargs);
#
# Check the placeholder and argument counts are the same
#
if ( $pcount ne $acount) {
say STDERR "Query placeholder vs argument mismatch";
say STDERR "Placeholders = $pcount, Arguments = $acount";
pod2usage(
-msg => "Wrong number of DB arguments\n",
-exitvalue => 1,
-verbose => 0
);
}
#-------------------------------------------------------------------------------
# Open the output file (or STDOUT)
#-------------------------------------------------------------------------------
my $outfh;
if ($outfile) {
open( $outfh, ">:encoding(UTF-8)", $outfile )
or die "Unable to open $outfile for writing: $!";
}
else {
open( $outfh, ">&", \*STDOUT )
or die "Unable to initialise for writing: $!";
}
#-------------------------------------------------------------------------------
# Load database configuration data; allow environment variables
#-------------------------------------------------------------------------------
my $conf = Config::General->new(
-ConfigFile => $configfile,
-InterPolateVars => 1,
-InterPolateEnv => 1,
-ExtendedAccess => 1
);
my %config = $conf->getall();
#
# Set defaults
#
$config{database}->{dbtype} //= 'SQLite';
$config{database}->{host} //= '127.0.0.1';
$config{database}->{port} //= 3306;
#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd = $config{database}->{password};
#$dbh = DBI->connect( "DBI:MariaDB:host=$dbhost;port=$dbport;database=$dbname",
# $dbuser, $dbpwd, { AutoCommit => 1 } )
# or die $DBI::errstr;
$dbh = db_connect(\%config);
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
$dbuser, $dbpwd, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;
#
# Set up the query
#
#-------------------------------------------------------------------------------
# Set up and perform the query
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare($query) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
@ -109,26 +197,398 @@ if ( $dbh->err ) {
#
# Perform the query
#
$sth1->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
try {
$sth1->execute(@dbargs);
if ( $dbh->err ) {
warn $dbh->errstr;
}
}
catch ($e) {
#
# The 'die' above was triggered. The error is in $_.
#
say STDERR "Failed to execute query.";
exit 1;
}
#
# Grab everything as an arrayref of hashrefs
#
$result = $sth1->fetchall_arrayref( {} );
#
# Prepare for JSON, forcing object key sorting (expensive)
#
$json = JSON->new->utf8->canonical;
#
# Grab everything as an arrayref of hashrefs
#
$result = $sth1->fetchall_arrayref( {} );
#
# Encode the Perl structure to JSON
#
print $json->encode($result), "\n";
say $outfh $json->encode($result);
close($outfh);
exit;
#=== FUNCTION ================================================================
# NAME: db_connect
# PURPOSE: Connects to a database using configuration settings including
# the database type
# PARAMETERS: $cfg Config::General object
# RETURNS: Database handle
# DESCRIPTION: The 'dbtype' field in the configuration file gets a default,
# but the 'name' field is mandatory. Depending on the
# (lowercase) type name a different form of database connect is
# performed after '$dbtype' is set to the format the DBD driver
# needs. The database handle is returned (unless there's an
# error).
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub db_connect {
my ($cfg) = @_;
my ( $dbh, $dbtype, $dbname );
$dbtype = $config{database}->{dbtype};
$dbname = $config{database}->{name};
die "Database name is mandatory\n" unless $dbname;
#
# Connect according to the database type
#
if ($dbtype =~ /sqlite/i) {
#
# The name for the SQLite driver is 'DBD:SQLite'
#
$dbtype = 'SQLite';
$dbh = DBI->connect( "DBI:$dbtype:dbname=$dbname",
"", "", { AutoCommit => 1, sqlite_unicode => 1, } )
or die $DBI::errstr;
}
elsif ($dbtype =~ /mysql/i) {
#
# The name for the MySQL driver is 'DBD:mysql'
#
$dbtype = 'mysql';
my $dbhost = $config{database}->{host};
my $dbport = $config{database}->{port};
my $dbuser = $config{database}->{user};
my $dbpwd = $config{database}->{password};
$dbh = DBI->connect( "DBI:$dbtype:host=$dbhost;port=$dbport;database=$dbname",
$dbuser, $dbpwd, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;
# $dbh->trace('2|SQL');
}
elsif ($dbtype =~ /pg/i) {
#
# The name for the PostgreSQL driver is 'DBD:Pg'
#
$dbtype = 'Pg';
die "The PostgreSQL database type is not catered for yet.\n";
}
else {
die "Unknown database type: $dbtype\n";
}
return $dbh;
}
#=== FUNCTION ================================================================
# NAME: _debug
# PURPOSE: Prints debug reports
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
# $message Message to print
# RETURNS: Nothing
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
# newline and then adds one in the 'print' to the caller doesn't
# have to bother. Prepends the message with 'D> ' to show it's
# a debug message.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _debug {
my ( $active, $message ) = @_;
chomp($message);
print STDERR "D> $message\n" if $active;
}
#=== FUNCTION ================================================================
# NAME: _dbargs
# PURPOSE: Collects database arguments for the main query
# PARAMETERS: $opts hash reference holding the options
# RETURNS: An array holding all of the arguments
# DESCRIPTION: If there are -dbargs options they will be an array in the hash
# returned by Getopt::Long. We return the array to the caller.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _dbargs {
my ($opts) = @_;
my @args;
if ( defined( $opts->{dbargs} ) ) {
@args = @{ $opts->{dbargs} };
}
return (@args);
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION:
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = (
"help", "documentation|man", "debug=i", "config=s",
"output=s", "query=s", "dbargs=s@",
);
if ( !GetOptions( $optref, @options ) ) {
pod2usage( -msg => "Version $VERSION\n", -exitval => 1 );
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
query2json - A script for generating CSV from database query
=head1 VERSION
This documentation refers to query2json version 0.0.3
=head1 USAGE
query2json [-help] [-documentation|-man] [-debug=N] [-config=FILE]
[-query=FILE] [-output=FILE] [-[no]header] [QUERY]
=head1 OPTIONS
=over 4
=item B<-help>
Prints a brief help message describing the usage of the program, and then exits.
=item B<-documentation> or B<-man>
Displays the entirety of the documentation (using a pager), and then exits. To
generate a PDF version use:
pod2pdf query2json --out=query2json.pdf
=item B<-debug=N>
Selects a level of debugging. Debug information consists of a line or series
of lines prefixed with the characters 'D>':
=over 4
=item B<0>
No debug output is generated: this is the default
=item B<3>
Prints all data structures from options or from the database
=back
(The debug levels need work!)
=item B<-config=FILE>
This option allows an alternative configuration file to be used. This file
defines the location of the database, its port, its name and the username and
password to be used to access it. This feature was added to allow the script
to access alternative databases.
See the CONFIGURATION AND ENVIRONMENT section below for the file format.
If the option is omitted the default file is used: B<.hpr_sqlite.cfg>
=item B<-output=FILE>
This option defines an output file to receive the result of the query. If the
option is omitted the notes are written to STDOUT, allowing them to be
redirected if required.
=item B<-query=FILE>
The script needs an SQL query to be applied to the database. This may be
supplied as a file, in which case this option gives the name of the file.
Alternatively the query can be given as a delimited string on the command
line.
If neither method is used the script aborts with an error message.
=item B<-dbarg=ARG> [ B<-dbarg=ARG> ... ]
The query can have place holders ('?') in it and the corresponding values for
these placeholders can be passed to the script through the B<-dbarg=ARG>
option. The option can be repeated as many times as required and the order of
B<ARG> values is preserved.
=item B<-[no-]header>
This option allows a header to be added to the CSV output with the names of
the database columns in CSV format. By default this is not produced.
=back
=head1 DESCRIPTION
The purpose of the script is to run a query against a local database. It will
query SQLite or MySQL/MariaDB databases. The database choice is made via
a configuration file using B<dbtype = TYPE>. The default file points to the
local SQLite copy of the HPR database, but the alternative (discussed later)
can access the equivalent MySQL database.
The data returned from the query is then passed through a JSON conversion
library and the results output to the terminal or to a file.
=head1 DIAGNOSTICS
=over 4
=item B<Unable to find configuration file ...>
The nominated (or default) configuration file could not be found.
=item B<Unable to find query file ...>
The nominated query file could not be found.
=item B<Couldn't open ...: ...>
The nominated query file could not be opened.
=item B<various database errors>
An error has occurred while performing a database operation.
=item B<Failed to execute query.>
There is a mismatch between the number of placeholders in the query ('?'
characters) and the number of arguments provided through the B<-dbargs=ARG>
option. The script will attempt to analyse whether there are too many or too
few arguments
=back
=head1 CONFIGURATION AND ENVIRONMENT
The script obtains the credentials it requires to open the SQLite or MariaDB database
from a configuration file. No credentials are required for the SQLite format.
The name of the MySQL database or the SQLite file it expects is specified as
a simple name or as an absolute file path. This configuration file can be overridden
using the B<-config=FILE> option as described above.
The configuration file formats are as follows:
SQLite format:
<database>
dbtype = SQLite
name = /home/cendjm/HPR/Community_News/hpr.db
</database>
MySQL/MariaDB format:
<database>
dbtype = MySQL
host = 127.0.0.1
name = DBNAME
user = DBUSER
password = PASSWORD
</database>
=head1 EXAMPLES
query2json -help
# Find tags fields shorter than 5 characters between shows 1 and 2000
query2json -config=.hpr_sqlite.cfg \
'SELECT id,summary,tags FROM eps WHERE id BETWEEN 1 AND 2000
AND length(tags) < 5 ORDER BY id'
# Find hosts who have released HPR shows during 2025. The database
# arguments are the dates of the start and end of the year
year=2025
query2json -query=hosts_showcount.sqlite.sql \
-dbargs "${year}-01-01" -dbargs "${year}-12-31"
=head1 DEPENDENCIES
Config::General
Cwd
Data::Dumper
DBI
File::Slurper
Getopt::Long
Pod::Usage
Text::CSV_XS
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
Patches are welcome.
=head1 AUTHOR
Dave Morriss (Dave.Morriss@gmail.com)
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2021, 2022, 2024, 2025 Dave Morriss (Dave.Morriss@gmail.com).
All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker