#!/usr/bin/env perl
#===============================================================================
#
#         FILE: report_missing_tags
#
#        USAGE: ./report_missing_tags [-help] [-out=FILE]
#               [-sort=FIELD1[,FIELD2...]] [-action-csv=FILE] [-tags-csv=FILE]
#               [-json=FILE] [-config=FILE]
#
#  DESCRIPTION: Generate a report of shows which are missing tags and
#               summaries in the HPR database.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 0.1.4
#      CREATED: 2015-08-03 21:20:53
#     REVISION: 2022-08-03 23:00:42
#
#===============================================================================

use 5.010;
use strict;
use warnings;
use utf8;

use Getopt::Long;
use Pod::Usage;

use Config::General;

use Template;
use Template::Filters;
Template::Filters->use_html_entities;    # Use HTML::Entities in the template

use Text::CSV_XS;

use JSON;

use DBI;

use Data::Dumper;

#
# Version number (manually incremented)
#
our $VERSION = '0.1.4';

#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;

#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir    = "$ENV{HOME}/HPR/Database";
my $configfile = "$basedir/.hpr_db.cfg";
my $template   = "$basedir/$PROG.tpl";

my ( $dbh, $sth1, $h1 );
my ( $s_count, $t_count, $st_count, $missing, $showsbyhosts );
my @order_by;

#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";

#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
my $DEF_DEBUG = 0;

#
# Process options
#
my %options;
Options( \%options );

#
# Default help
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 )
    if ( $options{'help'} );

#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
my $cfgfile
    = ( defined( $options{config} ) ? $options{config} : $configfile );

my $outfile = $options{out};

if ( exists( $options{sort} ) ) {
    @order_by = getMultiOpts( $options{sort}, ',', 1,
        [ 'id', 'date', 'title', 'hostid', 'host' ] );
}
else {
    @order_by = qw{ id };
}
_debug( $DEBUG > 1, '@order_by: ' . join( "/", @order_by ) );

#
# Handle the optional Action CSV output file
#
my $acsvfile = $options{'action-csv'};
my $acsvfh;
if ($acsvfile) {
    open( $acsvfh, ">:encoding(UTF-8)", $acsvfile )
        or die "Unable to open $acsvfile for writing: $!";
}

#
# Handle the optional Tags CSV output file
#
my $tcsvfile = $options{'tags-csv'};
my $tcsvfh;
if ($tcsvfile) {
    open( $tcsvfh, ">:encoding(UTF-8)", $tcsvfile )
        or die "Unable to open $tcsvfile for writing: $!";
}

#
# Handle the optional JSON output file
#
my $jsonfile = $options{json};
my $jsonfh;
if ($jsonfile) {
    open( $jsonfh, ">:encoding(UTF-8)", $jsonfile )
        or die "Unable to open $jsonfile for writing: $!";
}

#-------------------------------------------------------------------------------
# Open the output file (or STDOUT) - we may need the date to do it
#-------------------------------------------------------------------------------
my $outfh;
if ($outfile) {
    open( $outfh, ">:encoding(UTF-8)", $outfile )
        or die "Unable to open $outfile for writing: $!";
}
else {
    open( $outfh, ">&", \*STDOUT )
        or die "Unable to initialise for writing: $!";
}

#
# Sanity check
#
die "Unable to find $cfgfile\n" unless ( -e $cfgfile );

#
# Load configuration data
#
my $conf = new Config::General(
    -ConfigFile      => $cfgfile,
    -InterPolateVars => 1,
    -ExtendedAccess  => 1
);
my %config = $conf->getall();

#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd  = $config{database}->{password};
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
    $dbuser, $dbpwd, { AutoCommit => 1 } )
    or die $DBI::errstr;

#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;

#-------------------------------------------------------------------------------
# Get the counts of missing items
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare(
    q{
    SELECT
        (SELECT count(*) FROM eps
         WHERE length(summary) = 0 AND DATEDIFF(date,CURDATE()) <= 0) AS s_count,
        (SELECT count(*) FROM eps
         WHERE length(tags) = 0 AND DATEDIFF(date,CURDATE()) <= 0) AS t_count,
        (SELECT count(*) FROM eps
         WHERE length(summary) = 0 AND length(tags) = 0
         AND DATEDIFF(date,CURDATE()) <= 0) AS st_count
}
) or die $DBI::errstr;

$sth1->execute;
if ( $dbh->err ) {
    warn $dbh->errstr;
}

#
# Fetch the data from the query and use hashref slicing to extract
#
$h1 = $sth1->fetchrow_hashref();
( $s_count, $t_count, $st_count )
    = @{$h1}{ 's_count', 't_count', 'st_count' };

_debug( $DEBUG > 1, '$s_count, $t_count, $st_count: ' .
    Dumper(\$s_count, \$t_count, \$st_count));

#-------------------------------------------------------------------------------
#  Create the main query with selected sort arguments
#-------------------------------------------------------------------------------
my $sql = q{
    SELECT
        e.id,
        date_format(e.date,'00:00:00 %d/%m/%Y') AS date,
        e.title,
        h.host,
        e.hostid AS hostid,
        if(length(e.summary) = 0,0,1) AS summary,
        if(length(e.tags) = 0,0,1) AS tags
    FROM eps e JOIN hosts h ON e.hostid = h.hostid
    WHERE (length(e.summary) = 0
        OR length(e.tags) = 0)
        AND DATEDIFF(e.date,CURDATE()) <= 0
};

$sql .= 'ORDER BY ' . join( ",", @order_by );

#-------------------------------------------------------------------------------
# Perform the main query and grab the results as an array of hashes
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare($sql) or die $DBI::errstr;
if ( $dbh->err ) {
    die $dbh->errstr;
}

$sth1->execute;
if ( $dbh->err ) {
    die $dbh->errstr;
}

#
# Grab the data as an arrayref of hashrefs
#
$missing = $sth1->fetchall_arrayref( {} );

#
# If asked for an action CSV file deal with it here
#
if ($acsvfile) {
    foreach my $row (@{$missing}) {
        printf $acsvfh "%s,%s,%s\n", $row->{id},$row->{summary},$row->{tags};
    }
    close($acsvfh);
}

#-------------------------------------------------------------------------------
# Perform a query relating hosts to shows. Each host who has shows in the
# database without tags or a summary is returned with the list of shows
# needing attention.
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare(
    q{
    SELECT
        e.hostid,
        h.host,
        group_concat(e.id ORDER BY e.id) AS shows
    FROM eps e
    JOIN hosts h ON e.hostid = h.hostid
    WHERE (length(e.summary) = 0
        OR length(e.tags) = 0)
        AND DATEDIFF(e.date,CURDATE()) <= 0
    GROUP BY e.hostid
    ORDER BY h.host
    }
) or die $DBI::errstr;

$sth1->execute;
if ( $dbh->err ) {
    warn $dbh->errstr;
}

#
# Grab the data as an arrayref of hashrefs
#
$showsbyhosts = $sth1->fetchall_arrayref( {} );

#-------------------------------------------------------------------------------
# Perform a scan of episodes for tags and accumulate them in a hash
#-------------------------------------------------------------------------------
$sth1 = $dbh->prepare(
    q{SELECT id,title,tags FROM eps WHERE length(tags) > 0}
) or die $DBI::errstr;

$sth1->execute;
if ( $dbh->err ) {
    warn $dbh->errstr;
}

my ( $status, @fields, %tag_ids, $lastkey, @tagindex, %showtitles );

my $csv = Text::CSV_XS->new(
    { binary => 1, auto_diag => 1, allow_loose_quotes => 1 } );

#
# Loop through the episodes returned by the query
#
while ( $h1 = $sth1->fetchrow_hashref ) {
    #
    # Stash the show title with the show number
    #
    $showtitles{$h1->{id}} = $h1->{title};

    #
    # Parse the tag list for the current episode
    #
    $status = $csv->parse( $h1->{tags} );
    unless ($status) {
        #
        # Report any errors
        #
        print "Parse error on episode ", $h1->{id}, "\n";
        print $csv->error_input(), "\n";
        next;
    }
    @fields = $csv->fields();

    #
    # Not sure why there are no tags but if not ignore this episode
    #
    next unless (@fields);

    #
    # Trim and lowercase all tags
    #
    @fields = map {
        my $t = $_;
        $t =~ s/(^\s+|\s+$)//g;
        lc($t)
    } @fields;

    #
    # Loop through the tags. For each tag add the associated episode id to the
    # %tag_ids hash. The key to this hash is the lower case tag and the value
    # is an array of episode numbers.
    #
    foreach my $tag (@fields) {
        if ( defined( $tag_ids{$tag} ) ) {
            #
            # Add to the existing array
            #
            push( @{$tag_ids{$tag}}, $h1->{id} );
        }
        else {
            #
            # Create the episode array
            #
            $tag_ids{$tag} = [$h1->{id}];
        }
    }
}

#
# Dumps the whole tags table. Warning!
#
_debug( $DEBUG > 2, '%tag_ids: ' . Dumper( \%tag_ids ) );

#-------------------------------------------------------------------------------
# Make an alphabetic index of the tags
#-------------------------------------------------------------------------------
$lastkey = '';
foreach my $tag ( sort( keys(%tag_ids) ) ) {
    if (substr($tag,0,1) ne $lastkey) {
        $lastkey = substr($tag,0,1);
        push(@tagindex,$tag);
    }
}

_debug( $DEBUG > 1, '@tagindex: ' . Dumper( \@tagindex ) );

#-------------------------------------------------------------------------------
# Output tags and show numbers in CSV form if requested
#-------------------------------------------------------------------------------
if ($tcsvfile) {
    my @line;
    foreach my $tag ( sort( keys(%tag_ids) ) ) {
        push(@line,$tag);

        foreach my $show (@{$tag_ids{$tag}}) {
            push(@line,$show);
        }

        print $tcsvfh join(",",@line), "\n";
        @line = ();
    }
    close($tcsvfh);
}


#-------------------------------------------------------------------------------
# Output a JSON report of the TSU (Tag and Summary Update) project if
# requested. Now no longer needed since the TSU project is finished.
#-------------------------------------------------------------------------------
if ($jsonfile) {
    my $jvars = {
        without_summaries => $s_count,
        without_tags      => $t_count,
        without_either    => $st_count,
        need_work         => $s_count + $t_count - $st_count,
    };

    my $json = JSON->new->utf8;
    print $jsonfh $json->encode($jvars);
    close($jsonfh);
}

#-------------------------------------------------------------------------------
# Fill and print the template
#-------------------------------------------------------------------------------
my $tt = Template->new(
    {   ABSOLUTE     => 1,
        ENCODING     => 'utf8',
        INCLUDE_PATH => $basedir,
        OUTPUT_PATH  => '.',
    }
);
my $vars = {
    title    => 'Shows without a summary and/or tags',
    order_by => \@order_by,
    s_count  => $s_count,
    t_count  => $t_count,
    st_count => $st_count,
    shows    => $missing,
    byhost   => $showsbyhosts,
    tag_ids  => \%tag_ids,
    tagindex => \@tagindex,
    titles   => \%showtitles,
};
my $document;
$tt->process( $template, $vars, \$document, { binmode => ':utf8' } )
    || die $tt->error(), "\n";

print $outfh $document;
close($outfh);

$dbh->disconnect;

exit;

#===  FUNCTION  ================================================================
#         NAME: _debug
#      PURPOSE: Prints debug reports
#   PARAMETERS: $active         Boolean: 1 for print, 0 for no print
#               $message        Message to print
#      RETURNS: Nothing
#  DESCRIPTION: Outputs a message if $active is true. It removes any trailing
#               newline and then adds one in the 'print' to the caller doesn't
#               have to bother. Prepends the message with 'D> ' to show it's
#               a debug message.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub _debug {
    my ( $active, $message ) = @_;

    chomp($message);
    print "D> $message\n" if $active;
}

#===  FUNCTION  ================================================================
#         NAME: getMultiOpts
#      PURPOSE: For use with a Getopt::Long option which is defined as "opt=s@"
#   PARAMETERS: $rargs          reference to an array of arguments from
#                               GetOpt::Long
#               $delim          delimiter to be used to split arguments
#               $lc             1 -> lowercase the arguments, 0 -> leave alone
#               $rallowed       reference to an array of allowed values (use
#                               undef if anything is allowed)
#      RETURNS: A list containing all of the individual arguments
#  DESCRIPTION: With "opt=s@" in the option definition the script will only
#               accept the repetition of the option with different values. You
#               can present "-opt=a,b,c" but no special parsing is done on
#               this. This function extends option parsing to cater for such
#               lists. The list is made unique and care is taken to keep the
#               original order.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub getMultiOpts {
    my ( $rargs, $delim, $lc, $rallowed ) = @_;

    #
    # Defaults
    #
    $delim = ',' unless $delim;
    $lc    = 0   unless $lc;

    #
    # Walk through the array of arguments and check them, lower case them,
    # save them or split them
    #
    my @args;
    foreach my $arg (@$rargs) {
        $arg = lc($arg) if $lc;
        if ( $arg =~ /$delim/ ) {
            push( @args, split( /$delim/, $arg ) );
        }
        else {
            push( @args, $arg );
        }
    }

    #
    # Remove the elements that aren't allowed
    #
    if ($rallowed) {
        my @new;
        foreach my $elem (@args) {
            push( @new, $elem ) if grep( /^$elem$/, @$rallowed );
        }
        @args = @new;
    }

    #
    # De-duplicate the result (without losing the original order)
    #
    my @arr1;
    foreach my $elem (@args) {
        push( @arr1, $elem ) if !grep( /^$elem$/, @arr1 );
    }
    @args = @arr1;

    #
    # Return the array as a list
    #
    return @args;
}

#===  FUNCTION  ================================================================
#         NAME: Options
#      PURPOSE: Processes command-line options
#   PARAMETERS: $optref     Hash reference to hold the options
#      RETURNS: Undef
#  DESCRIPTION:
#       THROWS: no exceptions
#     COMMENTS: none
#     SEE ALSO: n/a
#===============================================================================
sub Options {
    my ($optref) = @_;

    my @options = (
        "help",         "debug=i",    "out=s",  "sort=s@",
        "action-csv=s", "tags-csv=s", "json=s", "config=s",
    );

    if ( !GetOptions( $optref, @options ) ) {
        pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
    }

    return;
}

__END__

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#  Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{

=head1 NAME

report_missing_tags - Report missing HPR summaries and/or tags

=head1 VERSION

This documentation refers to B<report_missing_tags> version 0.1.4

=head1 USAGE

    ./report_missing_tags [-help] [-debug=N] [-out=FILE]
        [-sort=FIELD1[,FIELD2...]] [-action-csv=FILE] [-tags-csv=FILE]
        [-json=FILE] [-config=FILE]

    Examples:

    ./report_missing_tags
    ./report_missing_tags -help
    ./report_missing_tags -out=missing_tags.html
    ./report_missing_tags -out=missing_tags.html -action-csv=tag_summary_actions.csv
    ./report_missing_tags -out=missing_tags.html -tags-csv=tag_summary_actions.csv
    ./report_missing_tags -out=missing_tags.html -json=tag_data.json
    ./report_missing_tags -out=missing_tags.html -config=$HOME/HPR/.hpr_livedb.cfg

=head1 OPTIONS

=over 8

=item B<-help>

Prints a brief help message describing the usage of the program, and then exits.

=item B<-debug=N>

Causes certain debugging information to be displayed.

    0   (the default) no debug output
    1   N/A
    2   reports the chosen sort order and the counts of work yet to be done
        (now obsolete). Also dumps:
            - @tagindex an array containing tags for the index
    3   dumps data structures:
            - %tag_ids: the data used to build the entire tag list (warning!)

=item B<-out=FILE>

This option defines an output file to receive the report. If the option is
omitted the report is written to STDOUT, allowing it to be redirected if
required.

=item B<-sort=FIELD1[,FIELD2...]>

Changes the sort order of the report. The default is to sort by the I<id>
column, but other columns may be used, one or more.

=item B<-action-csv=FILE>

This optionally defines a file into which simple CSV data is written showing
the actions needed for shows missing tags or summarise. The CSV rows consist
of: the show number, a 0/1 value for the summary, and a 0/1 value for the
tags. This is for the use of other tools that need to know if it's OK to
present a show in a list for editing in order to add new summary and/or tags.
See the way B<make_tsu_blank> and B<edit_tsu_blank> work.

=item B<-tags-csv=FILE>

This optionally defines a file to contain CSV for tags and shows which iuse
them.

=item B<-json=FILE>

This optionally defines file into which some of the tag counts are written in
JSON format. The items written are:

    {
    "without_tags": 341,
    "without_summaries": 363,
    "need_work": 398,
    "without_either": 306
    }

=item B<-config=FILE>

This option allows an alternative configuration file to be used. This file
defines the location of the database, its port, its name and the username and
password to be used to access it. This feature was added to allow the script
to access alternative databases or the live database over an SSH tunnel.

See the CONFIGURATION AND ENVIRONMENT section below for the file format.

If the option is omitted the default file is used: B<.hpr_db.cfg>

=back

=head1 DESCRIPTION

A full description of the application and its features.
May include numerous subsections (i.e. =head2, =head3, etc.)


=head1 DIAGNOSTICS

A list of every error and warning message that the application can generate
(even the ones that will "never happen"), with a full explanation of each
problem, one or more likely causes, and any suggested remedies. If the
application generates exit status codes (e.g. under Unix) then list the exit
status associated with each error.


=head1 CONFIGURATION AND ENVIRONMENT

A full explanation of any configuration system(s) used by the application,
including the names and locations of any configuration files, and the
meaning of any environment variables or properties that can be set. These
descriptions must also include details of any configuration language used


=head1 DEPENDENCIES

A list of all the other modules that this module relies upon, including any
restrictions on versions, and an indication whether these required modules are
part of the standard Perl distribution, part of the module's distribution,
or must be installed separately.


=head1 INCOMPATIBILITIES

A list of any modules that this module cannot be used in conjunction with.
This may be due to name conflicts in the interface, or competition for
system or program resources, or due to internal limitations of Perl
(for example, many modules that use source code filters are mutually
incompatible).


=head1 BUGS AND LIMITATIONS

A list of known problems with the module, together with some indication
whether they are likely to be fixed in an upcoming release.

Also a list of restrictions on the features the module does provide:
data types that cannot be handled, performance issues and the circumstances
in which they may arise, practical limitations on the size of data sets,
special cases that are not (yet) handled, etc.

The initial template usually just has:

There are no known bugs in this module.
Please report problems to <Maintainer name(s)>  (<contact address>)
Patches are welcome.

=head1 AUTHOR

Dave Morriss (Dave.Morriss@gmail.com)

=head1 LICENCE AND COPYRIGHT

Copyright (c) 2015 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.

=cut

#}}}

# [zo to open fold, zc to close]

# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
