Database/query2csv, Database/query2json, Database/query2tt2: if the query contained comments with placeholders they would be added to the count and would usually cause the script to request '-dbarg=ARG' values be added. Now the SQL has all comments stripped to avoid this.
659 lines
19 KiB
Perl
Executable File
659 lines
19 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
#===============================================================================
|
|
#
|
|
# FILE: query2csv
|
|
#
|
|
# USAGE: ./query2csv [-help] [-documentation|-man] [-debug=N]
|
|
# [-config=FILE] [-query=FILE]
|
|
# [-dbarg=ARG1 [-dbarg=ARG2] ...] [-[no-]header] [QUERY]
|
|
#
|
|
# DESCRIPTION: Runs a query given as the only argument, or provided in
|
|
# a file. Caution is needed since *any* query will be run. The
|
|
# result of the query is output in CSV form on STDOUT or to
|
|
# a file. The -header option allows a CSV header of column names
|
|
# to be added (default not added). If the query contains '?'
|
|
# placeholders they can be filled with -dbarg=ARG options.
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: ---
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.0.5
|
|
# CREATED: 2015-07-11 15:53:01
|
|
# REVISION: 2025-05-25 18:26:13
|
|
#
|
|
#===============================================================================
|
|
|
|
use v5.40;
|
|
use utf8;
|
|
use open ':std', ':encoding(UTF-8)';
|
|
|
|
use feature qw{ say try };
|
|
|
|
use Cwd qw( getcwd abs_path ); # Detecting where the script lives
|
|
|
|
use Getopt::Long;
|
|
use Pod::Usage;
|
|
|
|
use Config::General;
|
|
use File::Slurper qw{ read_text };
|
|
use Text::CSV_XS;
|
|
use DBI;
|
|
|
|
use Data::Dumper;
|
|
|
|
#
|
|
# Version number (manually incremented)
|
|
#
|
|
our $VERSION = '0.0.5';
|
|
|
|
#
|
|
# Script name
|
|
#
|
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Declarations
|
|
#-------------------------------------------------------------------------------
|
|
#
|
|
# Constants and other declarations
|
|
#
|
|
#
|
|
# Make a variable to hold the working directory where the script is located
|
|
#
|
|
( my $basedir = abs_path($0) ) =~ s|/?[^/]*$||mx;
|
|
|
|
my $configfile = "$basedir/.hpr_sqlite.cfg";
|
|
|
|
my ( $dbh, $sth1, $aref1 );
|
|
my ( $query, @names, $csv );
|
|
my ( $pcount, $acount );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Options and arguments
|
|
#-------------------------------------------------------------------------------
|
|
my %options;
|
|
Options( \%options );
|
|
|
|
#
|
|
# Default help
|
|
#
|
|
pod2usage( -msg => "Version $VERSION\n", -exitval => 1, -verbose => 0 )
|
|
if ( $options{'help'} );
|
|
|
|
#
|
|
# Full documentation if requested with -doc
|
|
#
|
|
pod2usage(
|
|
-msg => "$PROG version $VERSION\n",
|
|
-verbose => 2,
|
|
-exitval => 1,
|
|
-noperldoc => 0,
|
|
) if ( $options{'documentation'} );
|
|
|
|
|
|
#
|
|
# Collect options
|
|
#
|
|
my $DEBUG = ( $options{'debug'} ? $options{'debug'} : 0 );
|
|
|
|
my $cfgfile
|
|
= ( defined( $options{config} ) ? $options{config} : $configfile );
|
|
|
|
my $queryfile = $options{query};
|
|
|
|
my $header = ( defined( $options{header} ) ? $options{header} : 0 );
|
|
|
|
my $outfile = $options{output};
|
|
_debug( $DEBUG >= 3, '$outfile: ' . $outfile ) if ($outfile);
|
|
|
|
my @dbargs = _dbargs( \%options );
|
|
_debug( $DEBUG >= 3, '@dbargs: ' . join( ',', @dbargs ) );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Option checks and defaults
|
|
#-------------------------------------------------------------------------------
|
|
die "Unable to find configuration file $cfgfile\n" unless ( -e $cfgfile );
|
|
_debug( $DEBUG >= 3, '$cfgfile: ' . $cfgfile );
|
|
|
|
#
|
|
# Query is an argument string or is in a file
|
|
#
|
|
if ($queryfile) {
|
|
die "Unable to find query file $queryfile\n" unless ( -e $queryfile );
|
|
$query = read_text($queryfile);
|
|
}
|
|
else {
|
|
$query = shift;
|
|
pod2usage(
|
|
-msg => "Please specify a SQL query\n",
|
|
-exitval => 1,
|
|
-verbose => 0
|
|
)
|
|
unless $query;
|
|
}
|
|
_debug( $DEBUG >= 3, '$query: ' . Dumper(\$query) );
|
|
|
|
#
|
|
# Strip SQL comments
|
|
#
|
|
$query = strip_sql_comments($query);
|
|
|
|
#
|
|
# Count placeholders in the query and the arguments provided. First remove all
|
|
# comments which may contain '?' characters, then count any that are left.
|
|
#
|
|
#$query = join("\n", grep {!/^--/} split( "\n", $query ) );
|
|
$pcount = grep {/\?/} split( '', $query );
|
|
$acount = scalar(@dbargs);
|
|
|
|
#
|
|
# Check the placeholder and argument counts are the same
|
|
#
|
|
if ( $pcount ne $acount) {
|
|
say STDERR "Query placeholder vs argument mismatch";
|
|
say STDERR "Placeholders = $pcount, Arguments = $acount";
|
|
pod2usage(
|
|
-msg => "Wrong number of DB arguments\n",
|
|
-exitvalue => 1,
|
|
-verbose => 0
|
|
);
|
|
}
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Open the output file (or STDOUT)
|
|
#-------------------------------------------------------------------------------
|
|
my $outfh;
|
|
if ($outfile) {
|
|
open( $outfh, ">:encoding(UTF-8)", $outfile )
|
|
or die "Unable to open $outfile for writing: $!";
|
|
}
|
|
else {
|
|
open( $outfh, ">&", \*STDOUT )
|
|
or die "Unable to initialise for writing: $!";
|
|
}
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Load database configuration data; allow environment variables
|
|
#-------------------------------------------------------------------------------
|
|
my $conf = Config::General->new(
|
|
-ConfigFile => $cfgfile,
|
|
-InterPolateVars => 1,
|
|
-InterPolateEnv => 1,
|
|
-ExtendedAccess => 1
|
|
);
|
|
my %config = $conf->getall();
|
|
|
|
#
|
|
# Set defaults in case values have been omitted
|
|
#
|
|
$config{database}->{dbtype} //= 'SQLite';
|
|
$config{database}->{host} //= '127.0.0.1';
|
|
$config{database}->{port} //= 3306;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Connect to the database
|
|
#-------------------------------------------------------------------------------
|
|
$dbh = db_connect(\%config);
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Set up and perform the query
|
|
#-------------------------------------------------------------------------------
|
|
$sth1 = $dbh->prepare($query) or die $DBI::errstr;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
#
|
|
# Perform the query
|
|
#
|
|
try {
|
|
$sth1->execute(@dbargs);
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
}
|
|
catch ($e) {
|
|
#
|
|
# The 'die' above was triggered. The error is in $_.
|
|
#
|
|
say STDERR "Failed to execute query.";
|
|
exit 1;
|
|
}
|
|
|
|
#
|
|
# Prepare to make CSV. Not sure if always quoting is the best idea though
|
|
#
|
|
$csv = Text::CSV_XS->new(
|
|
# { always_quote => 1 }
|
|
);
|
|
|
|
#
|
|
# Collect field names and output them at the start of the CSV if requested
|
|
#
|
|
if ($header) {
|
|
@names = @{$sth1->{NAME}};
|
|
_debug( $DEBUG >= 3, '@names: ' . Dumper(\@names) );
|
|
|
|
$csv->combine(@names);
|
|
say $outfh $csv->string();
|
|
}
|
|
|
|
#
|
|
# Loop through the returned rows making and printing CSV. Each row is returned
|
|
# as an arrayref to make it easy to join everything.
|
|
#
|
|
while ( $aref1 = $sth1->fetchrow_arrayref ) {
|
|
$csv->combine(@$aref1);
|
|
say $outfh $csv->string();
|
|
}
|
|
close($outfh);
|
|
|
|
exit;
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: db_connect
|
|
# PURPOSE: Connects to a database using configuration settings including
|
|
# the database type
|
|
# PARAMETERS: $cfg Config::General object
|
|
# RETURNS: Database handle
|
|
# DESCRIPTION: The 'dbtype' field in the configuration file gets a default,
|
|
# but the 'name' field is mandatory. Depending on the
|
|
# (lowercase) type name a different form of database connect is
|
|
# performed after '$dbtype' is set to the format the DBD driver
|
|
# needs. The database handle is returned (unless there's an
|
|
# error).
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub db_connect { #{{{
|
|
my ($cfg) = @_;
|
|
|
|
my ( $dbh, $dbtype, $dbname );
|
|
|
|
$dbtype = $cfg->{database}->{dbtype};
|
|
$dbname = $cfg->{database}->{name};
|
|
die "Database name is mandatory\n" unless $dbname;
|
|
|
|
#
|
|
# Connect according to the database type
|
|
#
|
|
if ($dbtype =~ /sqlite/i) {
|
|
#
|
|
# The name for the SQLite driver is 'DBD:SQLite'
|
|
#
|
|
$dbtype = 'SQLite';
|
|
_debug( $DEBUG >= 3, '$dbtype: ' . $dbtype, '$dbname: ' . $dbname );
|
|
|
|
$dbh = DBI->connect( "DBI:$dbtype:dbname=$dbname",
|
|
"", "", { AutoCommit => 1, sqlite_unicode => 1, } )
|
|
or die $DBI::errstr;
|
|
}
|
|
elsif ($dbtype =~ /mysql/i) {
|
|
#
|
|
# The name for the MySQL driver is 'DBD:mysql'
|
|
#
|
|
$dbtype = 'mysql';
|
|
|
|
my $dbhost = $cfg->{database}->{host};
|
|
my $dbport = $cfg->{database}->{port};
|
|
my $dbuser = $cfg->{database}->{user};
|
|
my $dbpwd = $cfg->{database}->{password};
|
|
|
|
$dbh = DBI->connect( "DBI:$dbtype:host=$dbhost;port=$dbport;database=$dbname",
|
|
$dbuser, $dbpwd, { AutoCommit => 1 } )
|
|
or die $DBI::errstr;
|
|
|
|
#
|
|
# Enable client-side UTF8
|
|
#
|
|
$dbh->{mysql_enable_utf8} = 1;
|
|
|
|
# $dbh->trace('2|SQL');
|
|
}
|
|
elsif ($dbtype =~ /pg/i) {
|
|
#
|
|
# The name for the PostgreSQL driver is 'DBD:Pg'
|
|
#
|
|
$dbtype = 'Pg';
|
|
|
|
die "The PostgreSQL database type is not catered for yet.\n";
|
|
}
|
|
else {
|
|
die "Unknown database type: $dbtype\n";
|
|
}
|
|
|
|
return $dbh;
|
|
} #}}}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: strip_sql_comments
|
|
# PURPOSE: Given a query as a scalar, strips all SQL comments
|
|
# PARAMETERS: $query string containing a query
|
|
# RETURNS: Stripped string
|
|
# DESCRIPTION: Two types of comments might exist in the query: the C-style
|
|
# and the SQL style. The string is treated as a single string
|
|
# even though it's multi-line, and any C-style comments are
|
|
# removed. Then the string is treated as multi-line and each
|
|
# line is scanned for SQL comments (which end at the end of the
|
|
# line), and these are stripped. Blank lines are skipped too to
|
|
# compress the output a little.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub strip_sql_comments { #{{{
|
|
my ($query) = @_;
|
|
|
|
my $result;
|
|
|
|
#
|
|
# Strip C-style comments
|
|
#
|
|
$query =~ s/\/\*.*?\*\///sg;
|
|
|
|
#
|
|
# Strip SQL line-oriented comments
|
|
#
|
|
foreach my $line (split(/\n/,$query)) {
|
|
next if $line =~ /^\s*$/;
|
|
$line =~ s/--.*$//;
|
|
$result .= "$line\n";
|
|
}
|
|
|
|
return $result;
|
|
} #}}}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _debug
|
|
# PURPOSE: Prints debug reports
|
|
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
|
|
# @messages Messages to print
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Outputs messages if $active is true. It removes any trailing
|
|
# newlines and then adds one to each line so the caller doesn't
|
|
# have to bother. Prepends 'D> ' to each message to show it's
|
|
# a debug message.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub _debug { #{{{
|
|
my ( $active, @messages ) = @_;
|
|
|
|
if ($active) {
|
|
chomp(@messages);
|
|
say STDERR "D> ", join( "\nD> ", @messages );
|
|
}
|
|
} #}}}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _dbargs
|
|
# PURPOSE: Collects database arguments for the main query
|
|
# PARAMETERS: $opts hash reference holding the options
|
|
# RETURNS: An array holding all of the arguments
|
|
# DESCRIPTION: If there are -dbargs options they will be an array in the hash
|
|
# returned by Getopt::Long. We return the array to the caller.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub _dbargs { #{{{
|
|
my ($opts) = @_;
|
|
|
|
my @args;
|
|
|
|
if ( defined( $opts->{dbarg} ) ) {
|
|
@args = @{ $opts->{dbarg} };
|
|
}
|
|
|
|
return (@args);
|
|
} #}}}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: Options
|
|
# PURPOSE: Processes command-line options
|
|
# PARAMETERS: $optref Hash reference to hold the options
|
|
# RETURNS: Undef
|
|
# DESCRIPTION:
|
|
# THROWS: no exceptions
|
|
# COMMENTS: none
|
|
# SEE ALSO: n/a
|
|
#===============================================================================
|
|
sub Options { #{{{
|
|
my ($optref) = @_;
|
|
|
|
my @options = (
|
|
"help", "documentation|man", "debug=i", "config=s",
|
|
"output=s", "query=s", "dbarg=s@", "header!",
|
|
);
|
|
|
|
if ( !GetOptions( $optref, @options ) ) {
|
|
pod2usage( -msg => "Version $VERSION\n", -exitval => 1 );
|
|
}
|
|
|
|
return;
|
|
} #}}}
|
|
|
|
__END__
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
# Application Documentation
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
#{{{
|
|
|
|
=head1 NAME
|
|
|
|
query2csv - A script for generating CSV from database query
|
|
|
|
=head1 VERSION
|
|
|
|
This documentation refers to query2csv version 0.0.5
|
|
|
|
=head1 USAGE
|
|
|
|
query2csv [-help] [-documentation|-man] [-debug=N] [-config=FILE]
|
|
[-query=FILE] [-output=FILE] [-[no]header] [QUERY]
|
|
|
|
=head1 OPTIONS
|
|
|
|
=over 4
|
|
|
|
=item B<-help>
|
|
|
|
Prints a brief help message describing the usage of the program, and then exits.
|
|
|
|
=item B<-documentation> or B<-man>
|
|
|
|
Displays the entirety of the documentation (using a pager), and then exits. To
|
|
generate a PDF version use:
|
|
|
|
pod2pdf query2csv --out=query2csv.pdf
|
|
|
|
=item B<-debug=N>
|
|
|
|
Selects a level of debugging. Debug information consists of a line or series
|
|
of lines prefixed with the characters 'D>':
|
|
|
|
=over 4
|
|
|
|
=item B<0>
|
|
|
|
No debug output is generated: this is the default
|
|
|
|
=item B<3>
|
|
|
|
Prints all data structures from options or from the database
|
|
|
|
=back
|
|
|
|
(The debug levels need work!)
|
|
|
|
=item B<-config=FILE>
|
|
|
|
This option allows an alternative configuration file to be used. This file
|
|
defines the location of the database, its port, its name and the username and
|
|
password to be used to access it. This feature was added to allow the script
|
|
to access alternative databases.
|
|
|
|
See the CONFIGURATION AND ENVIRONMENT section below for the file format.
|
|
|
|
If the option is omitted the default file is used: B<.hpr_sqlite.cfg>
|
|
|
|
=item B<-output=FILE>
|
|
|
|
This option defines an output file to receive the result of the query. If the
|
|
option is omitted the notes are written to STDOUT, allowing them to be
|
|
redirected if required.
|
|
|
|
=item B<-query=FILE>
|
|
|
|
The script needs an SQL query to be applied to the database. This may be
|
|
supplied as a file, in which case this option gives the name of the file.
|
|
|
|
Alternatively the query can be given as a delimited string on the command
|
|
line.
|
|
|
|
If neither method is used the script aborts with an error message.
|
|
|
|
=item B<-dbarg=ARG> [ B<-dbarg=ARG> ... ]
|
|
|
|
The query can have place holders ('?') in it and the corresponding values for
|
|
these placeholders can be passed to the script through the B<-dbarg=ARG>
|
|
option. The option can be repeated as many times as required and the order of
|
|
B<ARG> values is preserved.
|
|
|
|
=item B<-[no-]header>
|
|
|
|
This option allows a header to be added to the CSV output with the names of
|
|
the database columns in CSV format. By default this is not produced.
|
|
|
|
=back
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
The purpose of the script is to run a query against a local database. It will
|
|
query SQLite or MySQL/MariaDB databases. The database choice is made via
|
|
a configuration file using B<dbtype = TYPE>. The default file points to the
|
|
local SQLite copy of the HPR database, but the alternative (discussed later)
|
|
can access the equivalent MySQL database.
|
|
|
|
The data returned from the query is then passed through a CSV conversion
|
|
library and the results output to the terminal or to a file.
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
=over 4
|
|
|
|
=item B<Unable to find configuration file ...>
|
|
|
|
The nominated (or default) configuration file could not be found.
|
|
|
|
=item B<Unable to find query file ...>
|
|
|
|
The nominated query file could not be found.
|
|
|
|
=item B<Couldn't open ...: ...>
|
|
|
|
The nominated query file could not be opened.
|
|
|
|
=item B<various database errors>
|
|
|
|
An error has occurred while performing a database operation.
|
|
|
|
=item B<Failed to execute query.>
|
|
|
|
There is a mismatch between the number of placeholders in the query ('?'
|
|
characters) and the number of arguments provided through the B<-dbargs=ARG>
|
|
option. The script will attempt to analyse whether there are too many or too
|
|
few arguments
|
|
|
|
=back
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT
|
|
|
|
The script obtains the credentials it requires to open the SQLite or MariaDB database
|
|
from a configuration file. No credentials are required for the SQLite format.
|
|
The name of the MySQL database or the SQLite file it expects is specified as
|
|
a simple name or as an absolute file path. This configuration file can be overridden
|
|
using the B<-config=FILE> option as described above.
|
|
|
|
The configuration file formats are as follows:
|
|
|
|
SQLite format:
|
|
|
|
<database>
|
|
dbtype = SQLite
|
|
name = /home/cendjm/HPR/Community_News/hpr.db
|
|
</database>
|
|
|
|
MySQL/MariaDB format:
|
|
|
|
<database>
|
|
dbtype = MySQL
|
|
host = 127.0.0.1
|
|
name = DBNAME
|
|
user = DBUSER
|
|
password = PASSWORD
|
|
</database>
|
|
|
|
=head1 EXAMPLES
|
|
|
|
query2csv -help
|
|
|
|
# Find tags fields shorter than 5 characters between shows 1 and 2000
|
|
query2csv -config=.hpr_sqlite.cfg \
|
|
'SELECT id,summary,tags FROM eps WHERE id BETWEEN 1 AND 2000
|
|
AND length(tags) < 5 ORDER BY id'
|
|
|
|
# Find hosts who have released HPR shows during 2025. The database
|
|
# arguments are the dates of the start and end of the year
|
|
year=2025
|
|
query2csv -query=hosts_showcount.sqlite.sql \
|
|
-dbargs "${year}-01-01" -dbargs "${year}-12-31"
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
Config::General
|
|
Cwd
|
|
Data::Dumper
|
|
DBI
|
|
File::Slurper
|
|
Getopt::Long
|
|
Pod::Usage
|
|
Text::CSV_XS
|
|
|
|
=head1 BUGS AND LIMITATIONS
|
|
|
|
There are no known bugs in this module.
|
|
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
|
|
Patches are welcome.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Dave Morriss (Dave.Morriss@gmail.com)
|
|
|
|
|
|
=head1 LICENCE AND COPYRIGHT
|
|
|
|
Copyright (c) 2021, 2022, 2024, 2025 Dave Morriss (Dave.Morriss@gmail.com).
|
|
All rights reserved.
|
|
|
|
This module is free software; you can redistribute it and/or
|
|
modify it under the same terms as Perl itself. See perldoc perlartistic.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
=cut
|
|
|
|
#}}}
|
|
|
|
# [zo to open fold, zc to close]
|
|
|
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|
|
|