hpr-tools/Community_News/make_shownotes

#!/usr/bin/env perl
#===============================================================================
#
#         FILE: make_shownotes
#
#        USAGE: ./make_shownotes [-help] [-documentation] [-debug=N]
#               [-from=DATE] [-[no]comments] [-[no]markcomments] [-[no]ctext]
#               [-lastrecording=DATETIME] [-[no]silent] [-out=FILE]
#               [-episode=[N|auto]] [-[no]overwrite] [-mailnotes[=FILE]]
#               [-anyotherbusiness=FILE] [-template=FILE] [-config=FILE]
#               [-interlock=PASSWORD]
#
#  DESCRIPTION: Builds shownotes for a Community News show from the HPR
#               database using a template.  Writes the result to STDOUT or to
#               a file. Also writes to the database if requested.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: To view the entire documentation use:
#
#               ./make_shownotes -documentation
#
#               To create a PDF version of the documentation:
#
#               pod2pdf make_shownotes --out=make_shownotes.pdf
#
#               Where pod2pdf comes from App::pod2pdf
#
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 0.2.2
#      CREATED: 2014-04-24 16:08:30
#     REVISION: 2023-06-30 23:52:06
#
#===============================================================================

use 5.010;
use strict;
use warnings;
use utf8;

use Carp;
use Getopt::Long;
use Pod::Usage;

use Config::General;

use Date::Parse;
use Date::Calc qw{:all};
use DateTime;
use DateTime::Duration;

use Template;
use Template::Filters;
Template::Filters->use_html_entities;    # Use HTML::Entities in the template

use HTML::Entities;

use DBI;

use Data::Dumper;

#
# Version number (manually incremented)
#
our $VERSION = '0.2.2';

#
# Various constants
#
( my $PROG = $0 ) =~ s|.*/||mx;

#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir    = "$ENV{HOME}/HPR/Community_News";
my $configfile = "$basedir/.hpr_db.cfg";
my $bpfile     = "$basedir/shownote_template.tpl";

my $title_template = 'HPR Community News for %s %s';

#
# Needed to allow an older episode to have its notes regenerated. This is an
# 'apg'-generated password which is just hard to remember and requires some
# thought to use. The idea is to prevent older shownote rewriting by accident.
#
my $interlock_password = 'lumRacboikac';
my $interlock_enabled = 0;

my ( $dbh, $sth1, $h1 );
my ( @startdate, $hosts, $shows );
my ( @dc_lr, $dt_lr, @dc_lm, $dt_lm );
my ( $t_days, $missed_comments, $missed_count );
my ( $comments, $comment_count, $past_count, $ignore_count );
my ( %past, %current );

#
# The normal recording time (UTC). Any change should be copied in the POD
# documentation below.
# TODO: Should this be in a configuration file?
#
my @deftime = (15, 00, 00);

#
# Earliest comment release time
#
my @releasetime = (19, 00, 00);

#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";

#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
#
# Option defaults
#
my $DEFDEBUG = 0;

#
# Process options
#
my %options;
Options( \%options );

#
# Default help is just the USAGE section
#
pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 )
    if ( $options{'help'} );

#
# Full documentation if requested with -documentation
#
pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 2, -exitval => 1 )
    if ( $options{'documentation'} );

#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG );
my $cfgfile
    = ( defined( $options{config} ) ? $options{config} : $configfile );
my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 );
my $show_comments
    = ( defined( $options{comments} ) ? $options{comments} : 0 );
my $mark_comments
    = ( defined( $options{markcomments} ) ? $options{markcomments} : 0 );
my $ctext         = ( defined( $options{ctext} ) ? $options{ctext} : 0 );
my $lastrecording = $options{lastrecording};
my $outfile       = $options{out};
my $episode       = $options{episode};
my $overwrite = ( defined( $options{overwrite} ) ? $options{overwrite} : 0 );
my $template
    = ( defined( $options{template} ) ? $options{template} : $bpfile );
my $mailnotes = $options{mailnotes};
my $aobfile   = $options{anyotherbusiness};
my $interlock = $options{interlock};

#
# Sanity checks
#
die "Unable to find configuration file $cfgfile\n" unless ( -e $cfgfile );
if ( defined($episode) ) {
    if ( $episode =~ /^\d+$/ ) {
        die "Episode number must be greater than zero\n"
            unless ( $episode > 0 );
    }
    else {
        die "Episode must be a number or 'auto'\n"
            unless ( $episode eq 'auto' );
    }
}

die "Error: Unable to find template $template\n" unless -r $template;

#
# We accept '-mailnotes' meaning we want to use a default set of mail notes,
# or '-mailnotes=FILE' which means the notes are in a file for inclusion. If
# the option is omitted then we don't include mail notes (and the template is
# expected to do the right thing).
#
if (defined($mailnotes)) {
    if ($mailnotes =~ /^$/) {
        #
        # The default mail inclusion is provided in a named BLOCK directive in
        # the template. The name is hard-wired here
        #
        # FIXME: there's a dependency between the script and the template here
        # which is inflexible.
        #
        $mailnotes = 'default_mail';
    }
    else {
        die "Error: Unable to find includefile '$mailnotes'\n" unless -r $mailnotes;
    }
}

#
# The -anyotherbusiness=FILE or -aob=FILE options provide an HTML file to be
# added to the end of the notes.
#
if (defined($aobfile)) {
    die "Error: Unable to find includefile '$aobfile'\n" unless -r $aobfile;
}

#
# Use the date provided or the default
#
if ( defined( $options{from} ) ) {
    #
    # Parse and perform rudimentary validation on the -from option
    #
#    my @parsed = strptime( $options{from} );
#    die "Invalid -from=DATE option '$options{from}'\n"
#        unless ( defined( $parsed[3] )
#        && defined( $parsed[4] )
#        && defined( $parsed[5] ) );
#
#    $parsed[5] += 1900;
#    $parsed[4] += 1;
#    @startdate = @parsed[ 5, 4, 3 ];
    @startdate = parse_to_dc($options{from}, undef);
}
else {
    #
    # Default to the current date
    #
    @startdate = Today();
}

#
# If -interlock=PASSWORD was provided is the password valid?
#
if ( defined($interlock) ) {
    $interlock_enabled = $interlock eq $interlock_password;
    emit( $silent, "Interlock ",
        ( $interlock_enabled ? "accepted" : "rejected" ), "\n" );
}

#-------------------------------------------------------------------------------
# Configuration file - load data
#-------------------------------------------------------------------------------
emit( $silent, "Configuration file: ", $cfgfile, "\n" );
my $conf = Config::General->new(
    -ConfigFile      => $cfgfile,
    -InterPolateVars => 1,
    -ExtendedAccess  => 1,
);
my %config = $conf->getall();

#-------------------------------------------------------------------------------
# Date setup
#-------------------------------------------------------------------------------
#
# Transfer Date::Calc values into a hash for initialising a DateTime object.
# Force the day to 1
#
my ( @sd, $dt );
@sd = ( @startdate, 0, 0, 0 );
$sd[2] = 1;
$dt = dc_to_dt(\@sd);

emit( $silent, "Start of month: ", $dt->ymd, "\n" );

#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd  = $config{database}->{password};
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
    $dbuser, $dbpwd, { AutoCommit => 1 } )
    or croak $DBI::errstr;

#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;

#
# Set the local timezone to UTC for this connection
#
$dbh->do("set time_zone = '+00:00'") or carp $dbh->errstr;

#-------------------------------------------------------------------------------
# Open the output file (or STDOUT) - we may need the date to do it
#-------------------------------------------------------------------------------
my $outfh;
if ($outfile) {
    $outfile
        = sprintf( $outfile, sprintf( "%d-%02d", $dt->year, $dt->month ) )
        if ( $outfile =~ /%s/ );
    emit( $silent, "Output: ", $outfile, "\n" );

    open( $outfh, ">:encoding(UTF-8)", $outfile )
        or croak "Unable to open $outfile for writing: $!";
}
else {
    open( $outfh, ">&", \*STDOUT )
        or croak "Unable to initialise for writing: $!";
}

#-------------------------------------------------------------------------------
# Check the episode specification if given
#-------------------------------------------------------------------------------
if ( defined($episode) ) {
    my $title = sprintf( $title_template, $dt->month_name, $dt->year );

    emit( $silent, "\n" );

    #
    # Is it a number?
    #
    if ( $episode =~ /^\d+$/ ) {
        emit( $silent, "Writing to numbered episode option selected\n" );

        #
        # Does the number exist in the database?
        #
        $sth1 = $dbh->prepare(q{SELECT * FROM eps WHERE id = ?});
        $sth1->execute($episode);
        if ( $dbh->err ) {
            carp $dbh->errstr;
        }
        if ( $h1 = $sth1->fetchrow_hashref() ) {
            #
            # Episode exists, do more checks
            #
            emit( $silent, "Found episode $episode\n" );
            emit( $silent, "Title: ", $h1->{title}, "\n" );
            die "Error: wrong show selected\n"
                unless ( $h1->{title} eq $title );

            unless (validate_date($h1->{date}) || $interlock_enabled) {
                die "Error: show $episode has a date in the past\n";
            }

            unless ($overwrite) {
                die "Error: show $episode already has notes\n"
                    unless length( $h1->{notes} ) == 0;
            }
        }
        else {
            die "Error: episode $episode does not exist in the database\n";
        }
    }
    else {
        #
        # The required episode is 'auto' (we already checked). Now we actually
        # find the episode number corresponding to the month we're processing.
        # We do this by searching for the relevant title in the database.
        #
        emit( $silent, "Searching for show title: '$title'\n" );
        $sth1 = $dbh->prepare(q{SELECT * FROM eps WHERE title = ?});
        $sth1->execute($title);
        if ( $dbh->err ) {
            carp $dbh->errstr;
        }
        if ( $h1 = $sth1->fetchrow_hashref() ) {
            #
            # Found the episode by title
            #
            $episode = $h1->{id};
            emit( $silent, "Found episode $episode\n" );
            emit( $silent, "Title: ", $h1->{title}, "\n" );

            unless (validate_date($h1->{date}) || $interlock_enabled) {
                die "Error: show $episode has a date in the past\n";
            }

            unless ($overwrite) {
                die "Error: show $episode already has notes\n"
                    unless length( $h1->{notes} ) == 0;
            }
        }
        else {
            die 'Error: Unable to find an episode '
                . "for the selected month's notes\n";
        }
    }
}

#-------------------------------------------------------------------------------
# If asked (-comments -markcomments) compute the last recording date
#-------------------------------------------------------------------------------
if ($show_comments && $mark_comments) {
    #
    # We're marking comments so need to find the date of the recording of the
    # last show.
    #
    # It's possible to specify date and time of the last recording via option
    # '-lastrecording=DATETIME' (in case we recorded early or something), but
    # it needs to be parsed.
    #
    if ( defined( $options{lastrecording} ) ) {
        #
        # Parse and perform rudimentary validation on the -lastrecording option
        #
        @dc_lr = parse_to_dc( $options{lastrecording}, \@deftime );

    }
    else {
        #
        # Otherwise we assume the recording was on a Saturday and compute when
        # that was. We get back the date and time of the recording as
        # a Date::Calc object.
        #
        @dc_lr = find_last_recording( \@startdate, \@deftime );
    }

    #
    # Convert the D::C datetime to a DateTime object
    #
    $dt_lr = dc_to_dt(\@dc_lr);

    # Report it for checking (since this algorithm is complex)
    emit(
        $silent,
        sprintf("* %-100s *\n",
            ( defined( $options{lastrecording} ) ? 'Given' : 'Found' ) .
            ' last recording date on ' .
            $dt_lr->datetime . ' time zone ' . $dt_lr->strftime('%Z') .
            ' (' . $dt_lr->epoch . ')'
        )
    );

    #
    # Also, we need to know the last month for comment marking
    #
    @dc_lm = find_last_month(\@startdate);
    $dt_lm = dc_to_dt(\@dc_lm);

    # Report it for checking (since this algorithm is complex)
    emit(
        $silent,
        sprintf("* %-100s *\n",
            'Last month computed to be ' .
            $dt_lm->datetime . ' time zone ' . $dt_lm->strftime('%Z') .
            ' (' . $dt_lm->epoch . ')'
        )
    );

    #
    # Work out if the the recording date was before the end of the last
    # reviewed month.
    #
    $t_days = trailing_days(\@dc_lr, \@dc_lm);
    emit(
        $silent,
        sprintf("* %-100s *\n",
            'Recording was in the reviewed month and not on the ' .
            'last day, so comments may have been missed'
        )
    ) if $t_days;

    _debug( $DEBUG > 2, '@dc_lr = (' . join(',',@dc_lr) .')' );
    _debug( $DEBUG > 2, '$dt_lr->ymd = ' . $dt_lr->ymd );
    _debug( $DEBUG > 2, '$dt_lr->hms = ' . $dt_lr->hms );
    _debug( $DEBUG > 2, '@dc_lm = (' . join(',',@dc_lm) .')' );
}
else {
    #
    # We now need a default for $dt_lr in all cases because the query has been
    # changed.
    #
    @dc_lr = find_last_recording( \@startdate, \@deftime );
    $dt_lr = dc_to_dt(\@dc_lr);
}

#-------------------------------------------------------------------------------
# Data collection
#-------------------------------------------------------------------------------
#
# Prepare to get any new hosts for the required month. We let MySQL compute
# the end of the month. Order by date of first show.
#
$sth1 = $dbh->prepare(
    q{SELECT ho.host, ho.hostid, md.mindate
        FROM hosts ho
        JOIN (SELECT hostid, MIN(date) mindate FROM eps GROUP BY hostid) AS md
            ON ho.hostid = md.hostid
        WHERE md.mindate >= ? AND md.mindate < last_day(?) + interval 1 day
        ORDER BY mindate}
);
$sth1->execute( $dt->ymd, $dt->ymd );
if ( $dbh->err ) {
    carp $dbh->errstr;
}

#
# Grab the data as an arrayref of hashrefs
#
$hosts = $sth1->fetchall_arrayref( {} );

#
# Prepare to get the episodes for the required month. We let MySQL compute the
# end of the month. We include every column here just in case they'll be
# useful in the template, though this requires some aliasing.
# 2015-04-05 The date field has been reformatted so that the 'date' plugin in
# the form is happy with it.
#
$sth1 = $dbh->prepare(
    q{SELECT    eps.id AS eps_id,
--              eps.type,
--              date_format(eps.date,'%a %Y-%m-%d') AS date,
                date_format(eps.date,'00:00:00 %d/%m/%Y') AS date,
--              eps.date,
                eps.title,
                sec_to_time(eps.duration) as length,
                eps.summary,
                eps.notes,
--              eps.host AS eps_host,
                eps.hostid AS eps_hostid,
                eps.series,
                eps.explicit,
                eps.license AS eps_license,
                eps.tags,
                eps.version,
                eps.valid AS eps_valid,
                ho.hostid AS ho_hostid,
                ho.host AS ho_host,
                ho.email,
                ho.profile, -- was website,
                ho.license AS ho_license,
--              ho.repeat,
                ho.valid AS ho_valid
            FROM eps
            JOIN hosts ho ON eps.hostid = ho.hostid
        WHERE eps.date >= ?
        AND eps.date < last_day(?) + interval 1 day
        ORDER BY id}
);
$sth1->execute( $dt->ymd, $dt->ymd );
if ( $dbh->err ) {
    carp $dbh->errstr;
}

#
# Grab the data as an arrayref of hashrefs
#
$shows = $sth1->fetchall_arrayref( {} );

#
# Collect the comments if requested
#
if ($show_comments) {

    #
    # Grab the comments for the selected period. These have weird \' sequences
    # in the author, title and text, which doesn't seem right, so we strip
    # them.  Note that the end date in date ranges seems only to work as
    # 'last_day(?) + interval 1 day' presumably because without it the date is
    # interpreted as midnight the previous day (e.g. 2014-06-30 is early on
    # this day, not early on 2014-07-01 whereas adding 1 day gets this right).
    #
    # The logic here was rewritten 2015-03-04 and consists of:
    # - the sub-select collects the id numbers of all comments that have
    #   occurred in the selected period
    # - because all comments relating to a given show have the same id number
    #   this identifies all comment groups (comments relating to a given show)
    #   with members in the period
    # - the main query then selects all comments in these groups and joins all
    #   the required tables to them to get the other data we need; it computes
    #   the boolean 'in_range' to indicate whether a comment within a group
    #   should be displayed; it returns *all* comments so we can number them
    # - there might have been a way to generate a comment sequence number
    #   within the SQL but it was decided not to and to do this in the script
    #
    # Update 2015-03-26: the sort order of the final result used
    # 'comment_identifier_id' which didn't seem to work reliably. Changed this
    # to 'episode' which seems to work fine. The use of the former was only
    # arrived at by guesswork. The guess was obviously wrong.
    #
    # Update 2015-06-04: the test for whether a comment is approved was
    # failing because it was in the wrong place. Also, the sub-select seemed
    # wrong, and running through EXPLAIN EXTENDED showed some flaws.
    # Redesigned this and the whole query to be more in keeping with the
    # algorithm sketched out above. This seems to have solved the problems.
    #
    # Update 2015-08-17: Still not right; somehow the 'in_range' check got
    # mangled and the process of counting comments where a recent one was made
    # to an old show was messed up. Dropped the two queries with a UNION and
    # went back to the check for comment_identifier_id in a sub-query. This
    # time the sub-query is cleverer and returns identifiers where the
    # `comment_timestamp` is in the target month *and* the episode date is in
    # the target month or before the start of the target month. This works for
    # July 2015 (where there's a comment made about an August show) and August
    # 2015 (where there's a comment made about a July show).
    #
    # Update 2023-05-02: Issues with comments received on the day of
    # recording. This matter has been addresed earlier in the year but not all
    # that well. The way this is all handled is not very clever so tried
    # adjusting the main comment query to return such comments since we had
    # one on 2023-04-29 which wasn't in the report.
    #
    #{{{
    #
    # Graveyard of old failed queries...
    # ==================================
#    $sth1 = $dbh->prepare(
#        q{
#        SELECT
#            cc.comment_identifier_id,
#            eps.id AS episode,
#--          ci.identifier_url,
#            substr(ci.identifier_url,1,locate('/eps.php?id=',ci.identifier_url)+15)
#                AS identifier_url,
#            eps.title,
#            eps.date,
#            ho.host,
#            ho.hostid,
#            from_unixtime(cc.comment_timestamp,'%Y-%m-%dT%TZ') AS timestamp,
#            replace(cc.comment_author_name,'\\\\','') AS comment_author_name,
#            replace(cc.comment_title,'\\\\','') AS comment_title,
#            replace(cc.comment_text,'\\\\','') AS comment_text,
#            cc.comment_timestamp,
#            (CASE WHEN (cc.comment_timestamp >= unix_timestamp(?)
#                    AND cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day)) THEN
#                1 ELSE 0 END) AS in_range
#        FROM c5t_comment cc
#        JOIN c5t_identifier ci ON ci.identifier_id = cc.comment_identifier_id
#        JOIN eps ON eps.id =
#            substr(ci.identifier_url,locate('/eps.php?id=',ci.identifier_url)+12)
#        JOIN hosts ho ON eps.hostid = ho.hostid
#        WHERE cc.comment_status = 0
#        AND cc.comment_identifier_id IN (
#            SELECT DISTINCT cc2.comment_identifier_id FROM c5t_comment cc2
#                WHERE (
#                    cc2.comment_timestamp >= unix_timestamp(?)
#                        AND cc2.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day)
#                    )
#            )
#        AND (eps.date < (last_day(?) + interval 1 day))
#        ORDER BY eps.id ASC, comment_timestamp ASC
#        }
#    );
#
# This one is nuts...
#    $sth1 = $dbh->prepare( q{
#        (
#            SELECT
#                cc.comment_identifier_id,
#                eps.id AS episode,
#                substr(ci.identifier_url,1,locate('/eps.php?id=',ci.identifier_url)+15)
#                    AS identifier_url,
#                eps.title,
#                eps.date,
#                ho.host,
#                ho.hostid,
#                from_unixtime(cc.comment_timestamp,'%Y-%m-%dT%TZ') AS timestamp,
#                replace(cc.comment_author_name,'\\\\','') AS comment_author_name,
#                replace(cc.comment_title,'\\\\','') AS comment_title,
#                replace(cc.comment_text,'\\\\','') AS comment_text,
#                cc.comment_timestamp,
#                (CASE WHEN (cc.comment_timestamp >= unix_timestamp(?)
#                        AND cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day)) THEN
#                    1 ELSE 0 END) AS in_range
#            FROM c5t_comment cc
#            JOIN c5t_identifier ci ON ci.identifier_id = cc.comment_identifier_id
#            JOIN eps ON eps.id =
#                substr(ci.identifier_url,locate('/eps.php?id=',ci.identifier_url)+12,4)
#            JOIN hosts ho ON eps.hostid = ho.hostid
#            WHERE eps.date >= ? AND eps.date < (last_day(?) + interval 1 day)
#        )
#        UNION
#        (
#            SELECT
#                cc.comment_identifier_id,
#                eps.id AS episode,
#                substr(ci.identifier_url,1,locate('/eps.php?id=',ci.identifier_url)+15)
#                    AS identifier_url,
#                eps.title,
#                eps.date,
#                ho.host,
#                ho.hostid,
#                from_unixtime(cc.comment_timestamp,'%Y-%m-%dT%TZ') AS timestamp,
#                replace(cc.comment_author_name,'\\\\','') AS comment_author_name,
#                replace(cc.comment_title,'\\\\','') AS comment_title,
#                replace(cc.comment_text,'\\\\','') AS comment_text,
#                cc.comment_timestamp,
#                (CASE WHEN (cc.comment_timestamp >= unix_timestamp(?)
#                        AND cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day)) THEN
#                    1 ELSE 0 END) AS in_range
#            FROM c5t_comment cc
#            JOIN c5t_identifier ci ON ci.identifier_id = cc.comment_identifier_id
#            JOIN eps ON eps.id =
#                substr(ci.identifier_url,locate('/eps.php?id=',ci.identifier_url)+12,4)
#            JOIN hosts ho ON eps.hostid = ho.hostid
#            WHERE (cc.comment_timestamp >= unix_timestamp(?)
#                AND cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day))
#            AND eps.date < (last_day(?) + interval 1 day)
#        )
#        ORDER BY episode ASC, comment_timestamp ASC
#    });
#
# This one worked fine - after much messing around admittedly:
#    $sth1 = $dbh->prepare( q{
#        SELECT
#            cc.comment_identifier_id,
#            eps.id AS episode,
#            substr(ci.identifier_url,1,locate('/eps.php?id=',ci.identifier_url)+15)
#                AS identifier_url,
#            eps.title,
#            eps.date,
#            ho.host,
#            ho.hostid,
#            from_unixtime(cc.comment_timestamp,'%Y-%m-%dT%TZ') AS timestamp,
#            replace(cc.comment_author_name,'\\\\','') AS comment_author_name,
#            replace(cc.comment_title,'\\\\','') AS comment_title,
#            replace(cc.comment_text,'\\\\','') AS comment_text,
#            cc.comment_timestamp,
#            (CASE WHEN
#                (
#                    (cc.comment_timestamp >= unix_timestamp(?)
#                        AND cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day))
#                    AND eps.date < (last_day(?) + interval 1 day)
#                    OR (cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day)
#                        AND (eps.date >= ? AND eps.date < (last_day(?) + interval 1 day)))
#                )
#                THEN 1 ELSE 0 END) AS in_range
#        FROM c5t_comment cc
#        JOIN c5t_identifier ci ON ci.identifier_id = cc.comment_identifier_id
#        JOIN eps ON eps.id =
#            substr(ci.identifier_url,locate('/eps.php?id=',ci.identifier_url)+12,4)
#        JOIN hosts ho ON eps.hostid = ho.hostid
#        WHERE cc.comment_status = 0
#        AND cc.comment_identifier_id IN
#        (
#            SELECT DISTINCT
#                cc.comment_identifier_id
#            FROM c5t_comment cc
#            JOIN c5t_identifier ci ON ci.identifier_id = cc.comment_identifier_id
#            JOIN eps ON eps.id =
#                substr(ci.identifier_url,locate('/eps.php?id=',ci.identifier_url)+12,4)
#            WHERE
#                (eps.date >= ? AND eps.date < (last_day(?) + interval 1 day))
#                OR (
#                    ( (cc.comment_timestamp >= unix_timestamp(?)
#                        AND cc.comment_timestamp < unix_timestamp(last_day(?) + interval 1 day)) )
#                    AND (eps.date >= ? AND eps.date < (last_day(?) + interval 1 day)
#                        OR (eps.date < ?))
#                )
#        )
#        ORDER BY episode ASC, comment_timestamp ASC
#    });
    #}}}

    #-------------------------------------------------------------------------------
    # Main comment query
    #-------------------------------------------------------------------------------
    $sth1 = $dbh->prepare( q{
        SELECT
            eps.id AS episode,
            concat('https://hackerpublicradio.org/eps/hpr',
                lpad(eps_id,4,0),'/index.html') AS identifier_url,
            eps.title,
            eps.summary,
            eps.date,
            ho.host,
            ho.hostid,
            date_format(co.comment_timestamp,'%Y-%m-%dT%TZ') AS timestamp,
            co.comment_author_name,
            co.comment_title,
            co.comment_text,
            unix_timestamp(co.comment_timestamp) AS comment_timestamp_ut,
            unix_timestamp(
                cast(concat(date(co.comment_timestamp),' ',?) AS DATETIME)
            ) AS comment_released_ut,
            (CASE WHEN
                (
                    (co.comment_timestamp >= ?
                        AND co.comment_timestamp < (last_day(?) + interval 1 day))
                    OR (co.comment_timestamp >= ?)
                    AND eps.date < (last_day(?) + interval 1 day)
                    OR (co.comment_timestamp < (last_day(?) + interval 1 day)
                        AND (eps.date >= ? AND eps.date < (last_day(?) + interval 1 day)))
                )
                THEN 1 ELSE 0 END) AS in_range
        FROM comments co
        JOIN eps ON eps.id = co.eps_id
        JOIN hosts ho ON eps.hostid = ho.hostid
        WHERE eps.id IN
        (
            SELECT DISTINCT
                eps.id
            FROM eps
            JOIN comments co ON (eps.id = co.eps_id)
            WHERE
                (eps.date >= ? AND eps.date < (last_day(?) + interval 1 day))
                OR (co.comment_timestamp >= ?)
                OR (
                    ( (co.comment_timestamp >= ?
                            AND co.comment_timestamp < (last_day(?) + interval 1 day)) )
                    AND (eps.date >= ? AND eps.date < (last_day(?) + interval 1 day)
                        OR (eps.date < ?))
                )
        )
        ORDER BY episode ASC, comment_timestamp ASC
    });

    $sth1->execute(
        $dt_lr->hms,
        ( $dt->ymd ) x 2,
        $dt_lr->ymd,
        ( $dt->ymd ) x 6,
        $dt_lr->ymd,
        ( $dt->ymd ) x 5
    );
    if ( $dbh->err ) {
        carp $dbh->errstr;
    }

    #
    # Grab the data as an arrayref of hashrefs
    #
    $comments = $sth1->fetchall_arrayref( {} );

    #-------------------------------------------------------------------------------
    # Post-process the results of the query
    #-------------------------------------------------------------------------------
    # The comment structure needs some further work because it contains all
    # comments for a given show but no indexes we can use (because the query
    # didn't generate any - this is a lot easier in PostgreSQL (and possibly
    # SQLite)). We also need to know how many comments we have within the
    # target period, which is usually a smaller number than the number of
    # comments we got back. The query has marked the ones we want to count
    # (and display) using 'in_range'.
    #
    my ( $ep, $lastep, $index ) = ( 0, 0, 0 );
    $comment_count = $past_count = $ignore_count = 0;
    for my $row (@$comments) {
        #
        # Give each comment an index whether in_range or not. The indexes
        # start from 1 when the episode number changes otherwise they
        # increment.
        #
        $ep = $row->{episode};
        if ( $ep == $lastep ) {
            $index++;
        }
        else {
            $index = 1;
        }
        #print "$ep $index ", $row->{in_range}, "\n";
        _debug(
            $DEBUG > 2,
            sprintf(
                "Index generation: episode=%s, index=%s, in_range=%s",
                $ep, $index, $row->{in_range}
            )
        );
        $lastep = $ep;

        #
        # Save the index for the template
        #
        $row->{index} = $index;

        #
        # Count the valid ones so the template doesn't have to to give a total
        # for this month
        #
        $comment_count++ if $row->{in_range};

        #
        # Make the comment text cleaner by removing carriage returns (not sure
        # why they are there in the first place)
        #
        $row->{comment_text} =~ s/\r+//g;

        #
        # Dates from the database look like '2016-08-01', we compare them to
        # the month we're processing to see if the comments are attached to
        # old shows.
        # FIXME: test for 'in_range' before setting 'past'
        #
        if ( validate_date($row->{date},$dt->ymd) ) {
            $row->{past} = 0;
        }
        else {
            $row->{past} = 1;
            $past_count++ if $row->{in_range};
        }
    }

    #
    # Now prune all of the comments which are not in_range to give the
    # template an easier job
    #
    @$comments = grep { $_->{in_range} } @$comments;

    # Explanation of the resulting structure {{{
    #
    # Restructure the comments into two hashes keyed by episode number where
    # each comment to that episode is stored in sequence in an array. The two
    # hashes %past and %current hold comments for shows in the past and for
    # the current month. These can be dealt with separately in the template,
    # making the logic therein somewhat simpler and clearer.
    #
    # The hash (of arrays of hashes) can be visualised thus:
    # %past = {
    #          '2457' => [
    #                      {
    #                        'past' => 1,
    #                        'hostid' => 111,
    #                        'identifier_url' => 'https://hackerpublicradio.org/eps.php?id=2457',
    #                        'comment_timestamp_ut' => 1556192523,
    #                        'date' => '2018-01-02',
    #                        'comment_text' => [snip],
    #                        'comment_title' => 'aren\'t you forgetting a hub?',
    #                        'timestamp' => '2019-04-25T11:42:03Z',
    #                        'in_range' => 1,
    #                        'index' => 1,
    #                        'host' => 'knightwise',
    #                        'title' => 'Getting ready for my new Macbook Pro',
    #                        'comment_author_name' => 'Bart',
    #                        'episode' => 2457,
    #                        'ignore' => 0
    #                      }
    #                    ]
    #          }
    #}}}

    #
    # Also, as we go, mark and count the comments in the %past hash which were
    # likely to have been read in the last show, so by this means simplify
    # what the template has to do.
    #
    for my $row (@$comments) {
        my $ep = $row->{episode};
        if ($row->{past}) {
            if ( $show_comments && $mark_comments ) {
               #if ( $row->{comment_timestamp_ut} <= $dt_lr->epoch
                if ( $row->{comment_released_ut} <= $dt_lr->epoch
                    && substr( $row->{date}, 0, 7 ) eq
                    substr( $dt_lm->ymd, 0, 7 ) )
                {
                    $row->{ignore} = 1;
                    $ignore_count++;
                }
                else {
                    $row->{ignore} = 0;
                }
            }
            else {
                $row->{ignore} = 0;
            }

            if (exists($past{$ep})) {
                push(@{$past{$ep}},$row);
            }
            else {
                $past{$ep} = [$row];
            }
        }
        else {
            if (exists($current{$ep})) {
                push(@{$current{$ep}},$row);
            }
            else {
                $current{$ep} = [$row];
            }
        }
    }

    _debug ($DEBUG > 2,
        '%past: ' . Dumper(\%past),
        '%current: ' . Dumper(\%current)
    );

    #-------------------------------------------------------------------------------
    # Make another data structure of missed coments *if* $t_days is true
    #-------------------------------------------------------------------------------
    # If $t_days is true then there might be comments from the previous month
    # that weren't covered in the recording. So we add them to the notes just
    # for the recording. Trouble is, if they exist they aren't in the comments
    # we have gathered, so we'll have to go and search for them with this
    # special query.
    #
    if ($t_days) {
        $sth1 = $dbh->prepare( q{
            SELECT
                eps.id AS episode,
                concat('https://hackerpublicradio.org/eps/hpr',
                    lpad(eps_id,4,0),'/index.html') AS identifier_url,
                eps.title,
                eps.summary,
                eps.date,
                ho.host,
                ho.hostid,
                date_format(co.comment_timestamp,'%Y-%m-%dT%TZ') AS timestamp,
                co.comment_author_name,
                co.comment_title,
                co.comment_text,
                unix_timestamp(co.comment_timestamp) AS comment_timestamp_ut
            FROM comments co
            JOIN eps ON eps.id = co.eps_id
            JOIN hosts ho ON eps.hostid = ho.hostid
            WHERE
                co.comment_timestamp >= ?
                AND co.comment_timestamp < (last_day(?)+ interval 1 day)
            ORDER BY episode ASC, comment_timestamp ASC
        });

        #
        # Need the date and time of the last recording and the start of the
        # last month we reviewed to perform the query.
        #
        # $sth1->execute( $dt_lr->datetime . 'Z', $dt_lm->ymd );
        $sth1->execute( $dt_lr->ymd, $dt_lm->ymd );
        if ( $dbh->err ) {
            carp $dbh->errstr;
        }

        #
        # Grab the data as an arrayref of hashrefs
        #
        $missed_comments = $sth1->fetchall_arrayref( {} );
        $missed_count = (
            defined($missed_comments)
            ? scalar(@$missed_comments)
            : 0
        );

        #
        # Make the comment text cleaner by removing carriage returns (not sure
        # why they are there in the first place)
        #
        for my $ch (@$missed_comments) {
            $ch->{comment_text} =~ s/\r+//g;
        }

        _debug ($DEBUG > 2,
            '@missed_comments: ' . Dumper($missed_comments)
        );

        #
        # After a change in design around 2023-05-02 there may be duplicates
        # in the %past hash and the @$missed_comments array. We need to hide
        # the former for now. They will not be hidden when $t_days is false
        # because we're not bothered about missed comments!
        #
        if ( $past_count > 0 ) {
            my @missed_episodes = map { $_->{episode} } @$missed_comments;

            _debug( $DEBUG > 2,
                '@missed_episodes: ' . Dumper( \@missed_episodes ) );

            delete( @past{@missed_episodes} );

            my $old_pc = $past_count;
            $past_count = scalar( keys(%past) );
            $comment_count -= ($old_pc - $past_count);

            _debug(
                $DEBUG > 2,
                '%past (edited): ' . Dumper( \%past ),
                "\$past_count: $past_count",
                "\$comment_count: $comment_count"
            );
        }
    }
}

#-------------------------------------------------------------------------------
# Fill and print the template
#-------------------------------------------------------------------------------
my $tt = Template->new(
    {   ABSOLUTE     => 1,
        ENCODING     => 'utf8',
        INCLUDE_PATH => $basedir,
        FILTERS      => {
            # For HTML->ASCII in comments_only.tpl, decode HTML entities
            'decode_entities' => \&my_decode_entities,
        },
    }
);

my $vars = {
    review_month    => $dt->month_name,
    review_year     => $dt->year,
    hosts           => $hosts,
    shows           => $shows,
    comment_count   => $comment_count,
    past_count      => $past_count,
    ignore_count    => $ignore_count,
    missed_count    => $missed_count,
    missed_comments => $missed_comments,
    comments        => $comments,                                # legacy
    past            => \%past,
    current         => \%current,
    skip_comments   => ( $show_comments ? 0 : 1 ),
    mark_comments   => $mark_comments,
    ctext           => $ctext,
    last_recording  => ( $mark_comments ? $dt_lr->epoch : 0 ),
    last_month      => (
        $mark_comments ? sprintf( "%d-%02d", $dt_lm->year, $dt_lm->month ) : 0
    ),
    includefile     => $mailnotes,
    aob             => defined($aobfile),
    aobfile         => $aobfile,
};

#print Dumper($vars),"\n";

my $document;
$tt->process( $template, $vars, \$document, { binmode => ':utf8' } )
    || die $tt->error(), "\n";
print $outfh $document;

#
# The episode number tests passed earlier on, so add the notes to the database
# if so requested
#
if ($episode) {
    emit( $silent,
        "Writing shownotes to the database for episode $episode\n" );
    $sth1 = $dbh->prepare(q{UPDATE eps SET notes = ? WHERE id = ?});
    $sth1->execute( $document, $episode );
    if ( $dbh->err ) {
        carp $dbh->errstr;
    }
}

$dbh->disconnect;

exit;

#===  FUNCTION  ================================================================
#         NAME: parse_to_dc
#      PURPOSE: Parse a textual date (and optional time) to a Date::Calc
#               datetime
#   PARAMETERS: $datetime       Datetime as a string
#               $deftime        Arrayref default time (as a Date::Calc array
#                               or undef)
#      RETURNS: A Date::Calc date (and possibly time) as a list
#  DESCRIPTION: The $datetime argument is parsed ewith Date::Parse. The year
#               and month need to be adjusted. If a default time has been
#               supplied then the parsed time is checked and the default time
#               used if nothing was found, otherwise the parsed time is used
#               and a full 6-component time returned.
#               If the default time us undefined this means we don't care
#               about the time and so we just return the parsed date as
#               a 3-component list.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub parse_to_dc {
    my ( $datetime, $deftime ) = @_;

    # What strptime returns:
    #  0   1   2   3    4      5     6
    # ($ss,$mm,$hh,$day,$month,$year,$zone)
    #
    my @parsed = strptime($datetime);
    die "Invalid DATE or DATETIME '$datetime'\n"
        unless ( defined( $parsed[3] )
        && defined( $parsed[4] )
        && defined( $parsed[5] ) );

    $parsed[5] += 1900;
    $parsed[4] += 1;

    if ( defined($deftime) ) {
        #
        # If no time was supplied add a default one
        #
        unless ( defined( $parsed[2] )
            && defined( $parsed[1] )
            && defined( $parsed[0] ) )
        {
            @parsed[ 2, 1, 0 ] = @$deftime;
        }

        #
        # Return a list
        #
        return ( @parsed[ 5, 4, 3, 2, 1, 0 ] );
    }
    else {
        return ( @parsed[ 5, 4, 3 ] );
    }

}

#===  FUNCTION  ================================================================
#         NAME: dc_to_dt
#      PURPOSE: Converts a Date::Calc datetime into a DateTime equivalent
#   PARAMETERS: $refdt          Reference to an array holding a Date::Calc
#                               date and time
#      RETURNS: Returns a DateTime object converted from the input
#  DESCRIPTION:
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub dc_to_dt {
    my ($refdt) = @_;

    #
    # Check we got a 6-element array
    #
    if (scalar(@$refdt) != 6) {
        print  "Invalid Date::Calc date and time (@$refdt)\n";
        die "Aborted\n";
    }

    #
    # Convert to DateTime to get access to formatting stuff
    #
    my ( %dtargs, $dt );
    @dtargs{ 'year', 'month', 'day', 'hour', 'minute', 'second', 'time_zone' }
        = ( @$refdt, 'UTC' );
    $dt = DateTime->new(%dtargs);

    #
    # Return the date as a DateTime object
    #
    return $dt;
}

#===  FUNCTION  ================================================================
#         NAME: find_last_recording
#      PURPOSE: Finds the recording date of the Community News episode
#               relating to the target month
#   PARAMETERS: $refdate        Reference to an array holding a Date::Calc
#                               date - the first of the selected month
#               $reftime        Reference to an array holding a Date::Calc
#                               time (UTC)
#      RETURNS: A Date::Calc object containing the date and time of the last
#               Community News recording in the UTC time zone
#  DESCRIPTION: We want to find the date of the last Community News recording
#               to determine whether a given comment preceded it. The scenario
#               is that we're using these notes while making such a recording
#               and want to know if comments occured before the last one. If
#               they did we should have read them during that show and don't
#               need to do so now. We want to pass the date generated here to
#               the template so it can compare it with comment dates. To
#               complete the story, the template will mark such comments, but
#               we'll turn of the marking before the notes are released - they
#               are just for use by the people recording the episode.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub find_last_recording {
    my ($refdate, $reftime) = @_;

    my $monday = 1;                    # Day of week number 1-7, Monday-Sunday

    #
    # Using the given date (the requested month), ensure it's the first day of
    # the month
    #
    my @lastmonth = @$refdate;
    $lastmonth[2] = 1;

    #
    # Work out the recording date in the target month
    #
    @lastmonth = Nth_Weekday_of_Month_Year( @$refdate[ 0, 1 ], $monday, 1 );
    @lastmonth = Add_Delta_Days( @lastmonth, -2 );

    #
    # Return the date as a DateTime object
    #
    return (@lastmonth,@$reftime);
}

#===  FUNCTION  ================================================================
#         NAME: find_last_month
#      PURPOSE: Finds the previous month for working out marks
#   PARAMETERS: $refdate        Reference to an array holding a Date::Calc
#                               date - the first of the selected month
#      RETURNS: A DateTime object containing the first day of the last month.
#  DESCRIPTION: We need the details of the last month because if we're marking
#               comments we have already read out in the previous Community
#               News show, but don't want to mark comments to shows before
#               that month, even if the comments fell within the target month.
#               This is a complex edge condition that wasn't appreciated in the
#               first implementation.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub find_last_month {
    my ($refdate) = @_;

    my $monday = 1;                    # Day of week number 1-7, Monday-Sunday
    my @starttime = ( 00, 00, 00 );    # UTC

    #
    # Using the given date (the requested month for the notes), ensure it's
    # the first day of the month
    #
    my @lastmonth = @$refdate;
    $lastmonth[2] = 1;

    #
    # Subtract one day to enter the previous month and force the first day of
    # the resulting month
    #
    @lastmonth = Add_Delta_Days( @lastmonth, -1 );
    $lastmonth[2] = 1;

    #
    # Return the date as a DateTime object
    #
    return (@lastmonth,@starttime);
}

#===  FUNCTION  ================================================================
#         NAME: trailing_days
#      PURPOSE: Determines if the last month had 'trailing' days - those after
#               the recording date - during which unread comments could have
#               been posted.
#   PARAMETERS: $dc_lr          reference to an array containing a Date::Calc
#                               date of the last recording
#               $dc_lm          reference to an array containing a Date::Calc
#                               date of the first day of last month
#      RETURNS: A true/false result - 1 if there were trailing days,
#               0 otherwise
#  DESCRIPTION: If the recording of a Community News show was during the month
#               being reviewed (e.g. March 2019; recording on 2019-03-30,
#               release on 2019-04-01) then a comment could have been made
#               after (or during!) the recording and probably would not have
#               been read during the show. We want to spot such a case and
#               highlight it in the *next* recording!
#               Yes, I know this is obsessive, but it needs a solution!!!
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub trailing_days {
    my ( $dc_lr, $dc_lm ) = @_;

    my $offset;

    #
    # Are the last month and the recording month the same?
    #
    if ( $dc_lm->[1] eq $dc_lr->[1] ) {
        #
        # Compute the offset as Delta_Days((First day of last month + days in
        # month), recording date). A positive offset (not sure if we'd get
        # a negative one) means there's some of the month still to go.
        #
        $offset
            = Delta_Days( @$dc_lr[0..2],
            Add_Delta_Days( @$dc_lm[0..2], Days_in_Month( @$dc_lm[ 0, 1 ] ) ) );
        return 1 if $offset gt 0;
    }

    return 0;
}

#===  FUNCTION  ================================================================
#         NAME: validate_date
#      PURPOSE: Checks the date found in the database ($date) is before the
#               reference date ($refdate)
#   PARAMETERS: $date           Textual date from the database
#               $refdate        Optional textual date to compare with. If
#                               omitted then we use 'Today'
#      RETURNS: True (1) if the $date is later than the $refdate, false (0)
#               otherwise
#  DESCRIPTION: We need to check that the script is not being used to change
#               the notes for a Community News show in the past. This is
#               because sometimes the generated notes are edited after they
#               have been created to add other elements, and we do not want to
#               accidentally destroy such changes. We just compute the
#               difference between today and the date of the target episode.
#               If the difference in days is greater than 0 then it's OK.
#               We also want to be able to use this routine to check whether
#               comments relate to old shows or this month's.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub validate_date {
    my ($date, $refdate) = @_;

    my @refdate;

    unless (defined($date)) {
        warn "check_date: invalid argument\n";
        return 0;
    }

    my @date = ($date =~ /^(\d{4})-(\d{2})-(\d{2})$/);
    if (defined($refdate)) {
        @refdate = ($refdate =~ /^(\d{4})-(\d{2})-(\d{2})$/);
    }
    else {
        @refdate = Today();
    }

    return (Delta_Days(@refdate,@date) >= 0);

}

#===  FUNCTION  ================================================================
#         NAME: my_decode_entities
#      PURPOSE: Call 'HTML::Entities::decode_entities' as a filter in a template
#   PARAMETERS: $text           The text string to process
#      RETURNS: The text string with all HTML entities decoded to Unicode
#  DESCRIPTION: This is a local filter to be called in a template. The name
#               it's called by is defined in the 'FILTERS' definition in the
#               call to Template::new. Maybe this function is redundant and
#               the real 'decode_entities' could have been called directly,
#               but this is slightly easier to understand (I think).
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub my_decode_entities {
    my $text = shift;

    decode_entities($text);
    return $text;
}

#===  FUNCTION  ================================================================
#         NAME: emit
#      PURPOSE: Print text on STDERR unless silent mode has been selected
#   PARAMETERS: - Boolean indicating whether to be silent or not
#               - list of arguments to 'print'
#      RETURNS: Nothing
#  DESCRIPTION: This is a wrapper around 'print' to determine whether to send
#               a message to STDERR depending on a boolean. We need this to be
#               able to make the script silent when the -silent option is
#               selected
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub emit {
    unless (shift) {
        print STDERR @_;
    }
}

#===  FUNCTION  ================================================================
#         NAME: _debug
#      PURPOSE: Prints debug reports
#   PARAMETERS: $active         Boolean: 1 for print, 0 for no print
#               @messages       List of messages to print
#      RETURNS: Nothing
#  DESCRIPTION: Outputs messages to STDERR if $active is true. For each
#               message it removes any trailing newline and then adds one in
#               the 'print' so the caller doesn't have to bother. Prepends
#               each message with 'D> ' to show it's a debug message.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO: N/A
#===============================================================================
sub _debug {
    my ( $active, @messages ) = @_;

    return unless $active;
    foreach my $msg (@messages) {
        chomp($msg);
        print STDERR "D> $msg\n";
    }
}

#===  FUNCTION  ================================================================
#         NAME: Options
#      PURPOSE: Processes command-line options
#   PARAMETERS: $optref     Hash reference to hold the options
#      RETURNS: Undef
#  DESCRIPTION:
#       THROWS: no exceptions
#     COMMENTS: none
#     SEE ALSO: n/a
#===============================================================================
sub Options {
    my ($optref) = @_;

    my @options = (
        "help",                   "documentation|man",
        "debug=i",                "from=s",
        "out=s",                  "template=s",
        "comments!",              "markcomments|mc!",
        "ctext!",                 "lastrecording|lr=s",
        "silent!",                "episode=s",
        "overwrite!",             "mailnotes:s",
        "anyotherbusiness|aob=s", "config=s",
        "interlock=s"
    );

    if ( !GetOptions( $optref, @options ) ) {
        pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
    }

    return;
}

__END__

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#  Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{

=head1 NAME

make_shownotes - Make HTML show notes for the Hacker Public Radio Community News show

=head1 VERSION

This documentation refers to B<make_shownotes> version 0.2.2


=head1 USAGE

    make_shownotes [-help] [-documentation] [-from=DATE] [-[no]comments]
        [-[no]markcomments] [-[no]ctext] [-lastrecording=DATETIME]
        [-[no]silent] [-out=FILE] [-episode=[N|auto]] [-[no]overwrite]
        [-mailnotes[=FILE]] [-anyotherbusiness=FILE] [-template=FILE]
        [-config=FILE] [-interlock=PASSWORD]

=head1 OPTIONS

=over 8

=item B<-help>

Displays a brief help message describing the usage of the program, and then exits.

=item B<-documentation>

Displays the entirety of the documentation (using a pager), and then exits. To
generate a PDF version use:

    pod2pdf make_shownotes --out=make_shownotes.pdf

=item B<-from=DATE>

This option is used to indicate the month for which the shownotes are to be
generated. The script is able to parse a variety of date formats, but it is
recommended that ISO8601 YYYY-MM-DD format be used (for example 2014-06-30).

The day part of the date must be present but is ignored and only the month and
year parts are used.

If this option is omitted the current month is used.

=item B<-[no]comments>

This option controls whether the comments pertaining to the selected month are
included in the output. If the option is omitted then no comments are included
(B<-nocomments>).

=item B<-[no]markcomments> or B<-[no]mc>

This option controls whether certain comments are marked in the HTML. The
default is B<-nomarkcomments>. The option can be abbreviated to B<-mc> and
B<-nomc>.

The scenario is that we want to use the notes the script generates while
making a Community News recording and we also want them to be the show notes
in the database once the show has been released.

Certain comments relating to shows earlier than this month were already
discussed last month, because they were made before that show was recorded. We
don't want to read them again during this show, so a means of marking them is
needed.

The script determines the date of the last recording (or it can be specified
with the B<-lastrecording=DATETIME> option, or its abbreviation
B<-lr=DATETIME>) and passes it to the template. The template can then compare
this date with the dates of the relevant comments and take action to highlight
those we don't want to re-read. It is up to the template to do what is
necessary to highlight them.

The idea is that we will turn off the marking before the notes are released
- they are just for use by the people recording the episode.

Another action is taken during the processing of comments when this option is
on. On some months of the year the recording is made during the month itself
because the first Monday of the next month is in the first few days of that
month. For example, in March 2019 the date of recording is the 30th, and the
show is released on April 1st. Between the recording and the release of the
show there is time during which more comments could be submitted.

Such comments should be in the notes for March (and these can be regenerated
to make sure this is so) but they will not have been read on the March
recording. The B<make_shownotes> script detects this problem and, if
B<-markcomments> is set (and comments enabled) will show a list of any
eligible comments in a red highlighted box. This is so that the volunteers
recording the show can ensure they read comments that have slipped through
this loophole. The display shows the entire comment including the contents,
but disappears when the notes are refreshed with B<-nomarkcomments> (the
default).

In this mode the preamble warning about comments to be ignored used to be
included, but now it is skipped if there are no such comments. This means one
switch can serve two purposes.

=item B<-lastrecording=DATETIME> or B<-lr=DATETIME>

As mentioned for B<-markcomments>, the date of the last recording can be
computed in the assumption that it's on the Saturday before the first Monday
of the month at 15:00. However, on rare occasions it may be necessary to
record on an earlier date and time, which cannot be computed. This value can
be defined with this option.

The format can be an ISO 8601 date followed by a 24-hour time, such as
'2020-01-25 15:00'. If the time is omitted it defaults to 15:00.

=item B<-[no]ctext>

This option controls whether the comment text itself is listed with comments.
This is controlled by the template, but the current default template only
shows the text in the B<Past shows> section of the output. The default
state is B<-noctext> in which the comment texts are not written.

=item B<-[no]silent>

This option controls whether the script reports details of its progress
to STDERR. If the option is omitted the report is generated (B<-nosilent>).

The script reports: the month it is working on, the name of the output file
(if appropriate) and details of the process of writing notes to the database
(if the B<-episode=[N|auto]> option is selected).

=item B<-mailnotes[=FILE]>

If desired, the show notes may include a section about recent discussions on
the HPR mailing list. Obviously, this text will change every month, so this
option provides a way in which an external file can be included in the show
notes.

The filename may be omitted which is a way in which a B<BLOCK> directive can
be placed in the template and used rather than the file. The B<BLOCK> must be
named B<default_mail> because this is the name the script uses in this
circumstance. See B<shownote_template8.tpl> for an example of its use.

The template must contain instructions to include the file or block. The file
name is stored in a variable 'B<includefile>' in the template. Directives of
the following form may be added to achive this:

    [%- IF includefile.defined %]
    Constant header, preamble, etc
    [%- INCLUDE $includefile %]
    Other constant text or tags
    [%- END %]

The first directive causes the whole block to be ignored if there is no
B<-mailnotes> option. The use of the B<INCLUDE> directive means that the
included file may contain Template directives itself if desired.

See existing templates for examples of how this is done.

=item B<-anyotherbusiness=FILE> or B<-aob=FILE>

If desired the shownotes may contain an 'Any other business' section. This is
implemented in a template thus:

    [% IF aob == 1 -%]
    <h2>Any other business</h2>
    [% INCLUDE $aobfile -%]
    [%- END %]

The template variable B<aob> is set to 1 if a (valid) file has been provided,
and the name of the file is in B<aobfile>.

The included file is assumed to be HTML.

=item B<-out=FILE>

This option defines an output file to receive the show notes. If the option is
omitted the notes are written to STDOUT, allowing them to be redirected if
required.

The output file name may contain the characters 'B<%s>'. This denotes the point
at which the year and month in the format B<YYYY-MM> are inserted. For example
if the script is being run for July 2014 the option:

    -out=shownotes_%s.html

will cause the generation of the file:

    shownotes_2014-07.html

=item B<-episode=[N|auto]>

This option provides a means of specifying an episode number in the database to
receive the show notes.

It either takes a number, or it takes the string 'B<auto>' which makes the
script find the correct show number.

First the episode number has to have been reserved in the database. This is
done by running the script 'B<reserve_cnews>'. This makes a reservation with
the title "HPR Community News for <monthname> <year>". Normally Community News
slots are reserved several months in advance.

Close to the date of the Community News show recording this script can be run
to write show notes to the database. For example:

    ./make_shownotes -from=1-Dec-2014 -out=/dev/null \
        -comm -tem=shownote_template5.tpl -ep=auto

This will search for the episode with the title "HPR Community News for
December 2014" and will add notes if the field is empty. Note that it is
necessary to direct the output to /dev/null since the script needs to write
a copy of the notes to STDOUT or to a file. In this case we request comments
to be added to the notes, and we use the template file
B<shownote_template5.tpl> which generates an HTML snippet suitable for the
database.

The writing of the notes to the database will fail if the field is not empty.
See the B<-overwrite> option for how to force the notes to be written.

If the B<-episode=[N|auto]> option is omitted no attempt is made to write to
the database.

=item B<-[no]overwrite>

This option is only relevant in conjunction with the B<-episode=[N|auto]>
option. If B<-overwrite> is chosen the new show notes will overwrite any notes
already in the database. If B<-nooverwrite> is selected, or the option is
omitted, no over writing will take place - it will only be possible to write
notes to the database if the field is empty.

=item B<-template=FILE>

This option defines the template used to generate the notes. The template is
written using the B<Template> toolkit language.

If the option is omitted then the script uses the file
B<shownote_template.tpl> in the same directory as the script. If this file
does not exist then the script will exit with an error message.

For convenience B<shownote_template.tpl> is a soft link which points to the
file which is the current default. This allows the development of versions
without changing the usual way this script is run.

=item B<-config=FILE>

This option allows an alternative configuration file to be used. This file
defines the location of the database, its port, its name and the username and
password to be used to access it. This feature was added to allow the script
to access alternative databases or the live database over an SSH tunnel.

See the CONFIGURATION AND ENVIRONMENT section below for the file format.

If the option is omitted the default file is used: B<.hpr_db.cfg>

=item B<-interlock=PASSWORD>

This option was added to handle the case where the notes for a Community News
episode have been posted after the show was recorded, but, since the recording
date was not the last day of the month further comments could be added after
upload. Logically these comments belong in the previous month's shownotes, so
we'd need to add them retrospecively.

Up until the addition of this option the script would not allow the
regeneration of the notes. This option requires a password to enable the
feature, but the password is in a constant inside the script. This means that
it's difficult to run in this mode by accident, but not particulary difficult
if it's really needed.

Take care not to run in this mode if the notes have been edited after they
were generated!

=back

=head1 DESCRIPTION

=head2 Overview

This script generates notes for the next Hacker Public Radio I<Community News>
show. It does this by collecting various details of activity from the HPR
database and passing them to a template. The default template is called
B<shownote_template.tpl> and this generates HTML, but any suitable textual
format could be generated if required, by using a different template.

=head2 Data Gathering

Four types of information are collected by the script:

=over 4

=item -

Details of new hosts who have released new shows in the selected month

=item -

Details of shows which have been released in the selected month

=item -

Details of topics on the mailing list in the past month can be included. This
is only done if the B<-mailnotes=FILE> option is used. This option must
reference a file of HTML, which may contain Template directives if required.

=item -

Comments which have been submitted to the HPR website in the selected month.
These need to be related to shows in the current period or in the past.
Comments made about shows which have not yet been released (but are visible on
the website) are not included even though they are made in the current month.

Comments are only gathered if the B<-comments> option is selected.

=back

=head2 Report Generation

The four components listed above are formatted in the following way by the
default template.

=over 4

=item B<New Hosts>

These are formatted as a list of links to the hostid with the host's name.

=item B<Shows>

These are formatted into an HTML table containing the show number, title and
host name. The show title is a link to the show page on the HPR website. The
host name is a link to the host page on the website.

=item B<Mailing list discussions>

If there have been significant topics on the mailing list in the month in
question then these can be summarised in this section. This is done by
preparing an external HTML file and referring to it with the
B<-mailnotes=FILE> option. If this is done then the file is included into the
template.

See the explanation of the B<-mailnotes> option for more details.

=item B<Comments>

These are formatted with <article> tags separated by horizontal lines.
A <header> shows the author name and title and a <footer> displays a link to
the show and the show's host and the show title is also included. The body of
the article contains the comment text with line breaks.

=back

=head2 Variable, Field and Hash names

If you wish to write your own template refer to the following lists for the
names of items. Also refer to the default template B<shownote_template.tpl>
for the techniques used there. (Note that B<shownote_template.tpl> is a link
to the current default template, such as B<shownote_template8.tpl>).

The hash and field names available to the template are as follows

=over 4

=item B<Global variables>

 Variable Name          Details
 -------------          -------
 review_month           The month name of the report date
 review_year            The year of the report date
 comment_count          The number of comments in total
 past_count             The number of comments on old shows
 skip_comments          Set when -comments is omitted
 mark_comments          Set when -markcomments is used
 ctext                  Set when the comment bodies in the 'Past shows'
                        section are to be shown
 last_recording         The date the last recording was made
                         (computed if -markcomments is selected) in
                         Unixtime format
 last_month             The month prior to the month for which the notes are
                        being generated (computed if -markcomments is
                        selected) in 'YYYY-MM' format

=item B<New Hosts>

The name of the hash in the template is B<hosts>. The hash might be empty if
there are no new hosts in the month. See the default template for how to
handle this.

 Field Name     Details
 ----------     -------
 host           Name of host
 hostid         Host id number

=item B<Show Details>

The name of the hash in the template is B<shows>. Note that there are more
fields available than are used in the default template. Note also that certain
field names are aliases to avoid clashes (e.g. eps_hostid and ho_hostid).

 Field Name     Details
 ----------     -------
 eps_id         Episode number
 date           Episode date
 title          Episode title
 length         Episode duration
 summary        Episode summary
 notes          Episode show notes
 eps_hostid     The numerical host id from the 'eps' table
 series         The series number from the 'eps' table
 explicit       The explicit marker for the show
 eps_license    The license for the show
 tags           The show's tags as a comma-delimited string
 version        ?Obsolete?
 eps_valid      The valid value from the 'eps' table
 ho_hostid      The host id number form the 'hosts' table
 ho_host        The host name
 email          The hosts's email address (true address - caution)
 profile        The host's profile
 ho_license     The default license for the host
 ho_valid       The valid value from the 'hosts' table

=item B<Mailing List Notes>

The variable B<includefile> contains the path to the file (which may only be
located in the same directory as the script).

=item B<Comment Details>

Two hashes are created for comments. The hash named B<past> contains comments
to shows before the current month, and B<current> contains comments to this
month's shows. Note that these hashes are only populated if the B<-comments>
option is provided. Both hashes have the same structure.

 Field Name             Details
 ----------             -------
 episode                Episode number
 identifier_url         Full show URL
 title                  Episode title
 date                   Episode date
 host                   Host name
 hostid                 Host id number
 timestamp              Comment timestamp in ISO8601 format
 comment_author_name    Name of the commenter
 comment_title          Title of comment
 comment_text           Text of the comment
 comment_timestamp_ut   Comment timestamp in Unixtime format
 in_range               Boolean (0/1) denoting whether the comment was made
                        in the target month
 index                  The numerical index of the comment for a given show

The purpose of the B<in_range> value is to denote whether a comment was made
in the target month. This is used in the script to split the comments into the
B<past> and B<current> hashes. It is therefore of little use in the template,
but is retained in case it might be useful. The B<index> value can be used in
the template to refer to the comment, make linking URLs etc. It is generated
by the script (unfortunately it couldn't be done in the SQL).

=back

=head2 Filters

A filter called B<decode_entities> is available to the template. The reason
for creating this was when the HTML of a comment is being listed as text
(Unicode actually). Since comment text is stored in the database as HTML with
entities when appropriate this is needed to prevent the plain text showing
I<&amp;> and the like verbatim. It is currently used in B<comments_only.tpl>.

=head1 DIAGNOSTICS

=over 4

=item B<Unable to find configuration file ...>

The nominated configuration file in B<-config=FILE> (or the default file)
cannot be found.

=item B<Episode number must be greater than zero>

The B<-episode=N> option must use a positive number.

=item B<Episode must be a number or 'auto'>

The B<-episode=> option must be followed by a number or the word 'auto'

=item B<Error: Unable to find includefile ...>

The include file referred to in the error message is missing.

=item B<Error: Unable to find template ...>

The template file referred to in the error message is missing.

=item B<Invalid -from=DATE option '...'>

The date provided through the B<-from=DATE> option is invalid. Use an ISO8601
date in the format YYYY-MM-DD.

=item B<Unable to open ... for writing: ...>

The file specified in the B<-out=FILE> option cannot be written to. This may
be because you do not have permission to write to the file or directory.
Further information about why this failed should be included in the message.

=item B<Unable to initialise for writing: ...>

The script was unable to open STDOUT for writing the report. Further
information about why this failed should be included in the message.

=item B<Error: wrong show selected>

The B<-episode=N> option has been selected and the script is checking the
numbered show but has not found a Community News title.

=item B<Error: show ... has a date in the past>

The B<-episode=> option has been selected and a Community News show entry has
been found in the database. However, this entry is for today's show or is in
the past, which is not permitted. It is possible to override this restriction
by using the B<-interlock=PASSWORD> option. See the relevant documentation for
details.

=item B<Error: show ... already has notes>

The B<-episode=> option has been selected and a Community News show entry has
been found in the database. However, this entry already has notes associated
with it and the B<-overwrite> option has not been specified.

=item B<Error: episode ... does not exist in the database>

The B<-episode=N> option has been selected but the script cannot find this
episode number in the database.

=item B<Error: Unable to find an episode for this month's notes>

The B<-episode=auto> option has been selected but the script cannot find the
episode for the month being processed.

Possible reasons for this are that the show has not been reserved in the
database or that the title is not as expected. Use B<reserve_cnews> to reserve
the slot. The title should be "HPR Community News for <monthname> <year>".

=back

=head1 CONFIGURATION AND ENVIRONMENT

The script obtains the credentials it requires to open the HPR database from
a configuration file. The name of the file it expects is B<.hpr_db.cfg> in the
directory holding the script. To change this will require changing the script.

The configuration file format is as follows:

 <database>
     host = 127.0.0.1
     port = PORT
     name = DATABASE
     user = USERNAME
     password = PASSWORD
 </database>

=head1 DEPENDENCIES

 Carp
 Config::General
 Date::Calc
 Date::Parse
 DateTime
 DateTime::Duration
 DBI
 Getopt::Long
 Pod::Usage
 Template
 Template::Filters

=head1 BUGS AND LIMITATIONS

There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
Patches are welcome.

=head1 AUTHOR

Dave Morriss  (Dave.Morriss@gmail.com)


=head1 LICENCE AND COPYRIGHT

Copyright (c) 2014-2019 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.

This program is distributed in the hope that it will be useful
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=cut

#}}}

# [zo to open fold, zc to close or za to toggle]

# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker