diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d584ed --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Ignore vim backup and swap files +*~ +*.swp diff --git a/Comment_system/.process_comments.cfg b/Comment_system/.process_comments.cfg new file mode 100644 index 0000000..b6612fd --- /dev/null +++ b/Comment_system/.process_comments.cfg @@ -0,0 +1,25 @@ +# +# Main configuration file for 'process_comments' using the local database +# +# /home/cendjm/HPR/Comment_system/.process_comments.cfg +# 2023-02-27 16:42:50 +# + +# +# Settings used in all configuration files +# +<> + +# +# Local database +# +<> + +# +# Fake CMS authentication +# + + user = "dummy:dummy" + + +# vim: syntax=cfg:ts=8:sw=4:tw=150:et:ai: diff --git a/Comment_system/.process_comments_live.cfg b/Comment_system/.process_comments_live.cfg new file mode 100644 index 0000000..cfb0aa8 --- /dev/null +++ b/Comment_system/.process_comments_live.cfg @@ -0,0 +1,25 @@ +# +# Main configuration file for 'process_comments' using the live database +# +# /home/cendjm/HPR/Comment_system/.process_comments_live.cfg +# 2023-02-27 16:48:08 +# + +# +# Settings used in all configuration files +# +<> + +# +# Local database +# +<> + +# +# CMS authentication +# + + <> + + +# vim: syntax=cfg:ts=8:sw=4:tw=150:et:ai: diff --git a/Comment_system/.process_comments_settings.cfg b/Comment_system/.process_comments_settings.cfg new file mode 100644 index 0000000..2a8466a --- /dev/null +++ b/Comment_system/.process_comments_settings.cfg @@ -0,0 +1,42 @@ +# +# Settings for 'process_comments' +# +# /home/cendjm/HPR/Comment_system/.process_comments_settings.cfg +# 2023-02-28 20:37:58 +# + + PROG = process_comments + + # + # Defaults + # + basedir = "$HOME/HPR/Comment_system" + configfile = "$basedir/.hpr_db.cfg" + logfile = "$basedir/logs/${PROG}.log" + template = "$basedir/${PROG}.tpl" + + # + # Mail message stash area + # + maildrop = "$HOME/HPR/CommentDrop" + processed = "$maildrop/processed" + rejected = "$maildrop/rejected" + banned = "$maildrop/banned" + + # + # JSON stash area + # + jsondir = "$basedir/json" + jprocessed = "$jsondir/processed" + jrejected = "$jsondir/rejected" + jbanned = "$jsondir/banned" + + # + # How to tell the server the comment's processed + # + callback_template = \ + "https://hub.hackerpublicradio.org/cms/comment_process.php?key=%s&action=%s" + + + +# vim: syntax=cfg:ts=8:sw=4:tw=150:et:ai: diff --git a/Comment_system/manage_comment_spool b/Comment_system/manage_comment_spool new file mode 100755 index 0000000..839f542 --- /dev/null +++ b/Comment_system/manage_comment_spool @@ -0,0 +1,166 @@ +#!/bin/bash - +#=============================================================================== +# +# FILE: manage_comment_spool +# +# USAGE: ./manage_comment_spool [subject] [message-id] +# +# DESCRIPTION: Deals with comments in the spool area where they are dropped +# by Thunderbird. This script is also designed to be run out of +# Thunderbird when it turns on or off the LED on the Blinkt! +# (using MQTT) and de-duplicates comments if necessary. +# +# OPTIONS: --- +# REQUIREMENTS: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com +# VERSION: 0.0.3 +# CREATED: 2023-07-14 15:38:33 +# REVISION: 2023-12-24 16:00:05 +# +#=============================================================================== + +set -o nounset # Treat unset variables as an error + +SCRIPT=${0##*/} + +VERSION="0.0.3" + +STDOUT="/dev/fd/2" + +#=== FUNCTION ================================================================ +# NAME: alert +# DESCRIPTION: Turn a LED on the Blinkt! host to an RGB colour +# PARAMETERS: 1 - LED number 0..7 +# 2 - RGB colour as 'R,G,B' values, default '0,0,0' +# RETURNS: 1 on error, otherwise 0 +#=============================================================================== +function alert () { + local LED="${1}" + local RGB="${2:-0,0,0}" + + local BHOST="192.168.0.63" + + mosquitto_pub -h $BHOST -t pimoroni/blinkt -m "rgb,$LED,$RGB" + +} + +#=== FUNCTION ================================================================ +# NAME: _usage +# DESCRIPTION: Report usage +# PARAMETERS: None +# RETURNS: Nothing +#=============================================================================== +_usage () { + cat >$STDOUT <<-endusage +Usage: ./${SCRIPT} [-h] [-s] [subject] [message-id] + +Version: $VERSION + +Script to be invoked via Thunderbird to manage and report on the comment spool +area + +Options: + -h Print this help + -s Silent mode, output less text about actions + +Arguments: + subject + message-id +These are optional and are only provided when called by Thunderbir + +Examples + ./${SCRIPT} -h + +endusage + exit +} + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# +# Option defaults +# +SILENT=0 # not silent by default + +# +# Process options +# +while getopts :hs opt +do + case "${opt}" in + h) _usage;; + s) SILENT=1;; + ?) echo "$SCRIPT: Invalid option; aborting"; exit 1;; + esac +done +shift $((OPTIND - 1)) + +# +# Constants +# +BASENAME="$HOME/HPR/Comment_system" +LOGDIR="$BASENAME/logs" +LOG="$LOGDIR/$SCRIPT.log" +SPOOLDIR="$HOME/HPR/CommentDrop" + +# The LED to light +LED=1 + +# Whether we're doing alerts +ALERTING=1 + +# +# We expect to be called with two arguments if called from Thunderbird, +# otherwise we'll make empty defaults. +# +if [[ $# -eq 2 ]]; then + subject="$1" + message_id="$2" +else + subject= + message_id= +fi + +# +# Check the spool directory +# +declare -a EMAIL +mapfile -t EMAIL < <(find "$SPOOLDIR" -maxdepth 1 -name "*.eml" -printf '%p\n') + +# +# Clear out files which end in '-1.eml' (or any single digit number), and tidy +# the array as well. +# +i=0 +for m in "${EMAIL[@]}"; do + if [[ "$m" =~ -[1-9].eml$ ]]; then + unset "EMAIL[$i]" + rm -f "$m" + fi + ((i++)) +done + +# +# If we have comments left we turn on the LED, otherwise we turn it off +# +comments="${#EMAIL[@]}" +if [[ $comments -eq 0 ]]; then + [ "$SILENT" == 0 ] && echo "Nothing found" + [ "$ALERTING" == 1 ] && alert $LED + exit +else + [ "$SILENT" == 0 ] && echo "Found $comments $(ngettext comment comments "$comments")" + [ "$ALERTING" == 1 ] && alert $LED '0,255,128' +fi + +# +# Log the call, but only if there were comments. This includes the two +# arguments passed by the filter, the subject and message-id. +# +echo "$SCRIPT $(date +'%F %H:%M:%S') '$$' '$subject' '$message_id'" >> "$LOG" + +exit + +# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21 diff --git a/Comment_system/process_comments b/Comment_system/process_comments new file mode 100755 index 0000000..5752303 --- /dev/null +++ b/Comment_system/process_comments @@ -0,0 +1,2343 @@ +#!/usr/bin/env perl +#=============================================================================== +# +# FILE: process_comments +# +# USAGE: ./process_comments [-help] [-docmentation|manpage] [-debug=N] +# [-[no]dry-run] [-verbose ...] [-[no]verify] [-[no]live] +# [-[no]json] [-config=FILE] +# +# DESCRIPTION: Read new comments received as mail messages (or as JSON files) +# and perform moderation actions on them before possibly adding +# them to the HPR database. +# +# OPTIONS: --- +# REQUIREMENTS: --- +# BUGS: --- +# NOTES: 2023-12-23 enabled the experimental::try feature and removed +# the use of 'Try::Tiny'. Revert by removing "use feature 'try'" +# and 'no warnings "experimental::try"'. Remove the '($e)' from +# each 'catch' and end the 'catch' braces with a semicolon. Add +# back 'use Try::Tiny'. +# +# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com +# VERSION: 0.3.3 +# CREATED: 2017-09-04 18:06:49 +# REVISION: 2024-01-10 13:02:51 +# +#=============================================================================== + +use v5.36; # say and try/catch +use feature 'try'; +use strict; +use warnings; +no warnings "experimental::try"; +use utf8; + +use Carp; +use Getopt::Long; +use Pod::Usage; + +use Config::General; +use List::Util qw{max}; +use HTML::Parser; +use HTML::Restrict; +use HTML::Entities; +use File::Find::Rule; +use File::Copy; +use File::Slurper 'read_text'; +#use IO::All; +use JSON; + +use Mail::Address; +use Mail::Field; +use Mail::Internet; +use MIME::Parser; +use Encode qw(decode encode); + +use DateTime::Format::ISO8601; +use IO::Prompter; +use Template; +use SQL::Abstract; +use LWP::UserAgent; + +#use TryCatch; # Failing 2020-05-31 due to a problem in Devel::Declare +#use Try::Tiny; # Using in-built experimental try/catch +use Log::Handler; + +use DBI; + +use Data::Dumper; + +# +# Version number (manually incremented) +# +our $VERSION = '0.3.3'; + +# +# Script and directory names +# +( my $PROG = $0 ) =~ s|.*/||mx; +( my $DIR = $0 ) =~ s|/?[^/]*$||mx; +$DIR = '.' unless $DIR; + +#------------------------------------------------------------------------------- +# Declarations +#------------------------------------------------------------------------------- +# +# Constants and other declarations +# +my $basedir = "$ENV{HOME}/HPR/Comment_system"; +my $configfile = "$basedir/.${PROG}.cfg"; + +my ( $dbh, $sth1, $h1 ); + +my ( @ignore, @files ); +my ($comment, %updates, $shortfile); + +# +# The MIME part we're looking for +# +my $target_mimetype = 'application/json'; + +# +# All of the keys in the JSON we're interested in (when checking and adding to +# the database) +# +my @json_keys = ( + "eps_id", "comment_timestamp", + "comment_author_name", "comment_title", + "comment_text", +); + +# +# Keys we need to clean +# +my @cleanable_keys = ( + "comment_author_name", "comment_title", + "comment_text", +); + +# +# Enable Unicode mode +# +binmode STDOUT, ":encoding(UTF-8)"; +binmode STDERR, ":encoding(UTF-8)"; + +#------------------------------------------------------------------------------- +# Options and arguments +#------------------------------------------------------------------------------- +my $DEF_DEBUG = 0; +my $DEF_LIVE = 0; # Gets edited on borg + +# +# Process options +# +my %options; +Options( \%options ); + +# +# Default help +# +pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 ) + if ( $options{'help'} ); + +# +# Full documentation if requested with -doc +# +pod2usage( + -msg => "$PROG version $VERSION\n", + -verbose => 2, + -exitval => 1, + -noperldoc => 0, +) if ( $options{'documentation'} ); + +# +# Collect options +# +my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG ); +my $cfgfile + = ( defined( $options{config} ) ? $options{config} : $configfile ); +my $live = ( defined( $options{live} ) ? $options{live} : $DEF_LIVE ); +my $verify = ( defined( $options{verify} ) ? $options{verify} : 0 ); +my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 ); +my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 ); +my $jsonmode = ( defined( $options{json} ) ? $options{json} : 0 ); + +# +# Report on the options if requested +# +if ($DEBUG > 0) { + _debug(1, 'Options'); + _debug(1, + '-config=' . $cfgfile, + ($live eq 0 ? '-nolive ' : '-live'), + ($verify eq 0 ? '-noverify ' : '-verify'), + ($dry_run eq 0 ? '-nodry-run ' : '-dry-run'), + '-verbose=[' . $verbose . ']', + ($jsonmode eq 0 ? '-nojson ' : '-json'), + ); + _debug(1, ''); +} + +# +# Sanity checks +# +die "Unable to find configuration file: $cfgfile\n" unless ( -e $cfgfile ); + +#------------------------------------------------------------------------------- +# Configuration file - load data +#------------------------------------------------------------------------------- +my $conf = Config::General->new( + -ConfigFile => $cfgfile, + -InterPolateVars => 1, + -InterPolateEnv => 1, + -ExtendedAccess => 1, +); +my %config = $conf->getall(); + +_debug($DEBUG > 2, 'Configuration settings:', Dumper(\%config)); + +my $logfile = $config{settings}->{logfile}; +my $template = $config{settings}->{template}; + +my $maildrop = $config{settings}->{maildrop}; +my $processed = $config{settings}->{processed}; +my $rejected = $config{settings}->{rejected}; +my $banned = $config{settings}->{banned}; + +my $jsondir = $config{settings}->{jsondir}; +my $jprocessed = $config{settings}->{jprocessed}; +my $jrejected = $config{settings}->{jrejected}; +my $jbanned = $config{settings}->{jbanned}; + +my $callback_template = $config{settings}->{callback_template}; + +my ($cb_user, $cb_password) = split(":",$config{cms}->{user}); + +#------------------------------------------------------------------------------- +# Moderation responses and the arguments associated with them (from the +# configuration data). +#------------------------------------------------------------------------------- +# +# key => [ 3 element array ] +# +# - key is the menu element from do_moderation +# - the array contains: +# - text to be used in messages +# - anonymous subroutine which returns the directory into which to place +# the file; call it with the $jsonmode variable +# - action keyword to send back to the server +# +my %moderation_keys = ( + 'Approve' => [ + 'processed', + sub { ( $_[0] ? $jprocessed : $processed ) }, + 'approve' + ], + 'Ban' => [ + 'banned', + sub { ( $_[0] ? $jbanned : $banned ) }, + 'block' + ], + 'Reject' => [ + 'rejected', + sub { ( $_[0] ? $jrejected : $rejected ) }, + 'delete' + ], +); + +# +# Make a RE from the keys +# +my $mkeys = join('|',keys(%moderation_keys)); +my $mkeys_re = qr{^$mkeys$}; + +#------------------------------------------------------------------------------- +# Connect to the database +#------------------------------------------------------------------------------- +my $dbhost = $config{database}->{host} // '127.0.0.1'; +my $dbport = $config{database}->{port} // 3306; +my $dbname = $config{database}->{name}; +my $dbuser = $config{database}->{user}; +my $dbpwd = $config{database}->{password}; +$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname", + $dbuser, $dbpwd, { AutoCommit => 1 } ) + or croak $DBI::errstr; + +#$dbh = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport", +# $dbuser, $dbpwd, { AutoCommit => 1 } ) +# or croak $DBI::errstr; + +# +# Enable client-side UTF8 +# +$dbh->{mysqlenable_utf8} = 1; + +# +# Set the local timezone to UTC for this connection +# +$dbh->do("set time_zone = '+00:00'") or carp $dbh->errstr; + +#------------------------------------------------------------------------------- +# Set up logging keeping the default log layout except for the date +#------------------------------------------------------------------------------- +my $log = Log::Handler->new(); + +$log->add( + file => { + timeformat => "%F %H:%M:%S (%s)", + filename => $logfile, + maxlevel => 7, + minlevel => 0 + } +); + +#------------------------------------------------------------------------------- +# If mode JSON is selected with -json option we'll check the defined 'json' +# directory, otherwise we'll look for work in the maildrop. +#------------------------------------------------------------------------------- +if ($jsonmode) { + # + # Look for JSON files + # + @ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' ); + my $jsonfileRE = qr{(\.(?i:json)|[^.]+)$}; + #<<< + @files = File::Find::Rule + ->file() + ->name($jsonfileRE) + ->not( File::Find::Rule->new->name(@ignore) ) + ->maxdepth(1) + ->in($jsondir); + #>>> + + if ( $DEBUG > 1 ) { + _debug(1, + "Files found in $jsondir", + Dumper( \@files ), + ); + } + + # + # There may be nothing there + # + die "No JSON files found; nothing to do\n" unless (@files); + + # + # Convert the JSON files into the %updates hash + # + do_JSON_process( \@files, \%updates, $log, $dry_run, $verbose ); + +} +else { + # + # Look for mail files + # + @ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' ); + + my $mailfileRE = qr{(\.(?i:eml)|[^.]+)$}; + #<<< + @files = File::Find::Rule + ->file() + ->name($mailfileRE) + ->not( File::Find::Rule->new->name(@ignore) ) + ->maxdepth(1) + ->in($maildrop); + #>>> + + if ( $DEBUG > 1 ) { + _debug(1, + "Files found in $maildrop", + Dumper( \@files ), + ); + } + + # + # There may be nothing there + # + unless (@files) { + warn "No mail found; nothing to do\n"; + exit; + } + + # + # Convert the mail messages into the %updates hash + # + do_mail_process( \@files, \%updates, $log, $dry_run, $verbose ); + +} + +#------------------------------------------------------------------------------- +# Check that comments aren't already in the database +#------------------------------------------------------------------------------- +do_database_check( $dbh, \%updates, $log, $verify, $dry_run ); + +#------------------------------------------------------------------------------- +# Perform the moderation pass +#------------------------------------------------------------------------------- +do_moderation( $dbh, \%updates, $template, $log, $verify, $dry_run ); + +# +# Dump the data structure if requested +# +_debug($DEBUG > 1, '%updates: ' . Dumper(\%updates)); + +#------------------------------------------------------------------------------- +# For the comments which are valid and approved, add them to the database +#------------------------------------------------------------------------------- +print '-' x 80, "\n" if $verbose > 0; +do_update( $dbh, \%updates, $log, $dry_run, $verbose ); + +#------------------------------------------------------------------------------- +# Walk the captured data reporting what happened (would happen) moving the +# files to the appropriate places and telling the server to do things to the +# originals (if appropriate) +#------------------------------------------------------------------------------- +foreach my $file ( sort_by_timestamp( \%updates ) ) { + # + # Deal with each file depending on the selected action (from the set + # 'Approve', 'Ban' and 'Reject') + # + if ( $updates{$file}->{action} =~ $mkeys_re ) { + # + # Get the elements we need for the function arguments below. + # NOTE: Apologies! This is obscure. The action key will be 'Approve', + # 'Ban' or 'Reject', and the lookup will return the name of the + # destination (for messages), an anonymous subroutine reference which determines + # the directory, and a keyword to send to the web server to dispose of + # the comment. The subref must be called with $jsonmode as the + # argument to get the destination which is different depending on + # whether we're in mail or JSON mode. This really needs to be made + # simpler and easier to understand when this code is revisited! + # + my ( $name, $destsub, $serverac ) + = ( @{ $moderation_keys{ $updates{$file}->{action} } } ); + my $dest = &$destsub($jsonmode); + + # + # Print moderation settings + # + _debug($DEBUG > 1, + '$name: ' . $name, + '$dest: ' . $dest, + '$serverac: ' . $serverac, + ); + + # + # Report what we're doing with the file and do it if appropriate + # + report_action( $updates{$file}, $name, $dest, $log, + $dry_run, $verbose ); + + # + # Tell the server what to do, but only if this is the live instance + # and we're not in dry-run mode + # + if ($live) { + unless ( + call_back( + $callback_template, $updates{$file}->{JSON}->{key}, + $serverac, $cb_user, $cb_password, $log, $dry_run, $verbose + ) + ) + { + print "Server update failed\n" if $verbose > 0; + } + } + else { + unless ($dry_run) { + print "Not live; no server action taken\n" if $verbose > 0; + } + } + } + elsif ( $updates{$file}->{action} eq 'Ignore' ) { + # + # The action here is 'Ignore' which means just leave it alone. Since + # only the moderator can choose this, or the failure to find the + # episode in the database, we know it's not a broken mail message or + # bad JSON. + # + if ( $verbose > 0 ) { + print "File '$file' ignored\n"; + print_errors( $updates{$file}->{errors} ); + } + } + elsif ( $updates{$file}->{skip} > 0 ) { + # + # The comment was found to be already in the database so we are + # skipping it. We want to report this and move the file to the + # 'processed' directory. + # + $shortfile = $updates{$file}->{shortfile}; + + unless ($dry_run) { + if ( $verbose > 0 ) { + print "Moving '$shortfile' to 'processed'\n"; + print "Reason: comment is already in the database\n"; + print_errors( $updates{$file}->{errors} ); + } + $log->info("Moving $file to 'processed'"); + unless ( moveFile( $file, ($jsonmode ? $jprocessed : $processed) ) ) { + warn "Unable to move $file\n"; + $log->error("Failed to move $file"); + } + } + else { + if ( $verbose > 0 ) { + print "Would move '$shortfile' to 'processed' (dry run)\n"; + print "Reason: comment is already in the database\n"; + print_errors( $updates{$file}->{errors} ); + } + } + } + else { + # + # We aborted this one early in the processing and validation, so it's + # really messed up. We're going to reject it outright with error + # messages. We can't tell the server because we don't have the + # key to do it. + # + $shortfile = $updates{$file}->{shortfile}; + + unless ($dry_run) { + if ( $verbose > 0 ) { + print "Moving $shortfile to 'rejected'\n"; + print "Reason: failed validation\n"; + print_errors( $updates{$file}->{errors} ); + } + $log->info("Moving $file to 'rejected'"); + unless ( moveFile( $file, ($jsonmode ? $jrejected : $rejected) ) ) { + warn "Unable to move $file\n"; + $log->error("Failed to move $file"); + } + } + else { + if ( $verbose > 0 ) { + print "Would move $shortfile to 'rejected' (dry run)\n"; + print "Reason: failed validation\n"; + print_errors( $updates{$file}->{errors} ); + } + } + } + + print '.' x 80, "\n" if $verbose > 0; +} + +exit; + +#=== FUNCTION ================================================================ +# NAME: do_JSON_process +# PURPOSE: Process any pending JSON files found in a directory search +# PARAMETERS: $files arrayref containing the JSON files found +# $updates hashref to contain the details collected from +# the JSON files +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: Nothing +# DESCRIPTION: The files containing JSON comments have been found earlier and +# passed to this function. They are processed in sorted order. +# The JSON is decoded into a hash and is validated, and the +# elements cleaned up. The JSON is stored for later in the +# $updates structure. If all tests and checks are passed the +# entry for this file in the $updates structure is marked as +# valid. If anything fails the entry is marked as invalid for +# the next stages of processing. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub do_JSON_process { + my ( $files, $updates, $log, $dry_run, $verbose ) = @_; + + my ( $shortfile, $dt ); + my $json = JSON->new; + + # + # Examine the files we found + # + foreach my $file ( sort(@$files) ) { + print ">> $file\n" if $verbose > 0; + $log->notice("Processing $file") unless $dry_run; + + # + # Initialise for this file, marking it as invalid by default. + # + $updates->{$file} = {}; + $updates->{$file}->{valid} = 0; + $updates->{$file}->{moderated} = 0; + $updates->{$file}->{errors} = []; + $updates->{$file}->{action} = ''; + $updates->{$file}->{file} = $file; # For convenience + $updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file); + + # + # Open the nominated file and load its contents into a scalar. Because + # 'read_text' does its own internal error handling we need try/catch to + # intercept it and provide our own. + # + my $json_text; + try { + $json_text = read_text($file); + } + + catch ($e){ + warn "Failed to read JSON file $file\n"; + next; + } + + # + # Decode and process the JSON, returning FALSE if it fails so we get + # a log entry (unless it's dry run mode) + # + unless ( + convert_JSON( + $json, $json_text, $updates, $file, + $log, $dry_run, $verbose + ) + ) + { + $log->warning("JSON decode failure; aborting '$shortfile'") + unless $dry_run; + } + + } + +} + +#=== FUNCTION ================================================================ +# NAME: do_mail_process +# PURPOSE: Process any pending email found in a directory search +# PARAMETERS: $files arrayref containing the mail files found +# $updates hashref to contain the details collected from +# the mail files +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: Nothing +# DESCRIPTION: The files containing email have been found earlier and passed +# to this function. They are processed in sorted order. Each +# file is parsed as a mail file and the various destination +# addresses collected. (TODO Check these against permitted +# addresses). Each message is expected to be in MIME format with +# an attachment, which is extracted. The target mimetype +# (application/json) is checked for (TODO This is a global and +# shouldn't be). The JSON it contains is decoded into a hash and +# is validated, and the elements cleaned up. The JSON is stored +# for later in the $updates structure. If all tests and checks +# are passed the entry for this file in the $updates structure +# is marked as valid. If anything fails the entry is marked as +# invalid for the next stages of processing. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub do_mail_process { + my ( $files, $updates, $log, $dry_run, $verbose ) = @_; + + my ( $shortfile, $dt ); + + # + # Prepare for parsing mail and JSON + # + my $parser = MIME::Parser->new; + $parser->output_under("/tmp"); + + my $json = JSON->new; + + # + # Examine the files we found + # + foreach my $file ( sort(@$files) ) { + print ">> $file\n" if $verbose > 0; + $log->notice("Processing $file") unless $dry_run; + + # + # Initialise for this file, marking it as invalid by default. + # + $updates->{$file} = {}; + $updates->{$file}->{valid} = 0; + $updates->{$file}->{moderated} = 0; + $updates->{$file}->{errors} = []; + $updates->{$file}->{action} = ''; + $updates->{$file}->{file} = $file; # For convenience + $updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file); + + # + # Open the current file and load its contents into a Mail::Internet object + # TODO: Consider using IO::All + # + open( my $mfh, '<', $file ) + or die "Failed to open input file '$file' : $!\n"; + + my $mi_obj = Mail::Internet->new($mfh); + + close($mfh) + or warn "Failed to close input file '$file' : $!\n"; + + # + # This should be to comments@hackerpublicradio.org in one of these headers + # + my @addresses = ( + Mail::Address->parse( $mi_obj->head->get('Resent-to') ), + Mail::Address->parse( $mi_obj->head->get('Resent-cc') ) + ); + if ( !@addresses ) { + @addresses = ( + Mail::Address->parse( $mi_obj->head->get('To') ), + Mail::Address->parse( $mi_obj->head->get('Cc') ) + ); + } + + # + # Report the headers we're interested in if requested + # + if ($verbose > 1) { + foreach my $addr (@addresses) { + my $dest = lc( $addr->address ); + print "To: $dest\n"; + } + print '~ ' x 40,"\n"; + } + + # + # TODO Check the message is actually for us + # + + my ( $ct, $cte, @body, $body, $results ); + + # + # Parse the message in the assumption it's MIME. Report any errors and + # warnings. + # + my $entity = $parser->parse_data( $mi_obj->as_string() ); + unless ($entity) { + $results = $parser->results; + foreach my $pmsg ( $results->msgs ) { + print STDERR "$pmsg\n"; + } + } + + # + # If we got multiple MIME parts then look for the first 'application/json' + # element and save it as the body we'll work on. + # + if ( scalar( $entity->parts ) > 0 ) { + # + # MIME message + # + if ( $DEBUG > 1 ) { + _debug(1, 'Message is MIME with multiple parts'); + foreach my $ent ( $entity->parts ) { + _debug(1, "Type: " . $ent->mime_type); + } + } + + # + # Get the first MIME part of the type we want and turn it into an + # array. + # TODO: Might it be better to make a string here since later we simply + # convert the array to a string again. + # + foreach my $ent ( $entity->parts ) { + if ( $ent->mime_type eq $target_mimetype ) { + $ct = $ent->mime_type; + # + # FIXME: Using 'bodyhandle' is supposed to decode the entity + # body, but this is untested. Also, we have to use 'as_string' + # then split on "\n" and stick the newlines back on the array + # elements, which seems a bit weird. Have to monitor this to + # see how it behaves. + # + #@body = @{ $ent->body() }; + @body = map {"$_\n"} + split( "\n", $ent->bodyhandle->as_string ); + _debug($DEBUG > 1, + 'Length of body = ' . scalar(@body) . ' lines' + ); + + # + # If we got a non-empty body then exit the loop + # + last if @body; + } + } + + # + # We found no suitable part so there's nothing to process here. We'll + # let the later phases detect this though + # + unless (@body) { + saveError( "MIME message has no valid '$target_mimetype' element", + $updates->{$file}->{errors} , $log, $dry_run, $verbose ); + next; + } + } + else { + # + # The message has no MIME parts + # + saveError( "Message has no attachment", + $updates->{$file}->{errors}, $log, $dry_run, $verbose ); + next; + } + + # + # Display the body unless we're not very verbose + # + $body = join( "", @body ); + print "$body\n" if $verbose > 2; + + # + # Decode and process the JSON, returning FALSE if it fails so we get + # a log entry (unless it's dry run mode) + # + unless ( + convert_JSON( + $json, $body, $updates, $file, + $log, $dry_run, $verbose + ) + ) + { + $log->warning("JSON decode failure; aborting '$shortfile'") + unless $dry_run; + } + + } + continue { + print '-' x 80, "\n" if $verbose > 0; + } + + return; +} + +#=== FUNCTION ================================================================ +# NAME: do_database_check +# PURPOSE: Look to see if the collected comments are already in the +# database +# PARAMETERS: $dbh database handle +# $updates structure (hashref) holding collected +# information about the files and comments in +# them +# $log log handle +# $verify Boolean flagging the need to verify +# TODO: Is this right? +# $dry_run Boolean flagging dry-run mode +# RETURNS: Nothing +# DESCRIPTION: Looks for each comment in %$updates in the database. The +# search criteria are the comment timestamp (in database format) +# and the comment author name. The timestamp is expected to have +# been converted from full ISO format (2024-01-10T11:16:11Z) to +# the format MySQL likes (2024-01-10 11:16:11). the original +# date is in the element 'orig_timestamp'. If there is a match +# in the database then the comment is to be skipped, and we mark +# the element appropriately and display and log a message. If no +# match we can proceed without skipping; +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub do_database_check { + my ( $dbh, $updates, $log, $verify, $dry_run ) = @_; + + my ( $sth1, $h1, $shortfile, $message ); + + # + # Look for a comment with details matching an incoming one. If found + # someone else has approved it so we need to skip it. + # + $sth1 = $dbh->prepare( + q{SELECT id FROM comments + WHERE comment_timestamp = ? + AND comment_author_name = ?} + ) or die $DBI::errstr; + if ( $dbh->err ) { + warn $dbh->errstr; + } + + foreach my $file ( sort_by_timestamp($updates) ) { + # + # Make a shortcut to the JSON part, and collect the short form of the + # filename. + # + my $ptr = $updates->{$file}->{JSON}; + #my $errors = $updates->{$file}->{errors}; + $shortfile = $updates{$file}->{shortfile}; + + # + # Look for the comment in the database + # + $sth1->execute( $ptr->{comment_timestamp}, + $ptr->{comment_author_name} ); + if ( $dbh->err ) { + warn $dbh->errstr; + } + + # + # Did we find it? + # + if ( $h1 = $sth1->fetchrow_hashref() ) { + # + # Found in the database. Mark this comment to be skipped and save + # an error report + # + #$updates->{$file}->{valid} = 0; + $updates->{$file}->{skip} = 1; + + $message = "Comment in file '$shortfile' is already in the database"; + print "$message\n" if $verbose > 0; + $log->warn($message) unless $dry_run; + } + else { + # + # Not found + # + $updates->{$file}->{skip} = 0; + } + + } + +} + +#=== FUNCTION ================================================================ +# NAME: do_moderation +# PURPOSE: Moderate all the valid comments in the 'updates' structure +# PARAMETERS: $dbh database handle +# $updates structure (hashref) holding collected +# information about the files and comments in +# them +# $tt_template path to the template we're using +# $log log handle +# $verify Boolean flagging the need to verify +# $dry_run Boolean flagging dry-run mode +# RETURNS: Nothing +# DESCRIPTION: Performs the moderation pass on the accumulated comments in +# the $updates structure. This is a hash (referenced by +# $updates) containing the filename as the key. Each of these +# lead to a hashref containing the JSON data (in a hashref), an +# arrayref of error messages and various flags denoting whether +# the comment is valid, has been moderated and what action the +# moderator took. +# The flags and error messages are manipulated here depending on +# what choice is made during the moderation process. +# The contents of the JSON hash (or a subset thereof) and the +# data about the show returned from a database query are +# displayed for the moderator using a template, and a menu of +# 'Approve', 'Ban', 'Reject', Ignore' is presented. +# On exit the structure referenced by $updates has been modified +# to signal what should be done with each of the comments. +# THROWS: No exceptions +# COMMENTS: TODO: References globals and should not. +# SEE ALSO: N/A +#=============================================================================== +sub do_moderation { + my ( $dbh, $updates, $tt_template, $log, $verify, $dry_run ) = @_; + + my ( $sth1, $h1, $cdt, $age ); + my $agelimit = 60 * 60 * 24; + + # + # We'll get details of the show each comment is associated with using this + # query. It will be passed to the TT² template. + # + $sth1 = $dbh->prepare( + q{SELECT e.id, e.date, e.title, h.host FROM eps e + JOIN hosts h ON e.hostid = h.hostid + WHERE e.id = ?} + ) or die $DBI::errstr; + if ( $dbh->err ) { + warn $dbh->errstr; + } + + # + # We'll display each comment with a TT² template + # + my $tt = Template->new( + { ABSOLUTE => 1, + ENCODING => 'utf8', + INCLUDE_PATH => $basedir, + } + ); + my $vars = {}; + my $choice; + + # + # Process the valid comments, and not marked as 'skip' + # + foreach my $file ( sort_by_timestamp($updates) ) { + if ( $updates->{$file}->{valid} > 0 && $updates->{$file}->{skip} == 0 ) { + # + # Make shortcuts + # + my $ptr = $updates->{$file}->{JSON}; + my $errors = $updates->{$file}->{errors}; + + # + # Check the age of the comment if we're verifying, and ignore if + # less than the age limit. The original timestamp should have been + # reserved even though we had to reformat for MySQL/MariaDB. + # + if ($verify) { + $cdt = DateTime::Format::ISO8601->parse_datetime( + $ptr->{orig_timestamp} + ); + $age = DateTime->now->epoch - $cdt->epoch; + if ($age < $agelimit) { + print "Choice 'Ignore' auto-selected due to comment age\n"; + $updates->{$file}->{action} = 'Ignore'; + $log->info("Ignored comment in $file") + unless $dry_run; + $updates->{$file}->{moderated} = 0; + next; + } + } + + # + # Get the show details from the database + # + $sth1->execute( $ptr->{eps_id} ); + if ( $dbh->err ) { + warn $dbh->errstr; + } + + # + # Do we know this episode? + # NOTE: If the database we're using is the clone this might fail + # because it's out of step with the live one + # + if ( $h1 = $sth1->fetchrow_hashref() ) { + # + # Populate the variables for the template + # + $vars->{file} = $file; + $vars->{db} = $h1; + $vars->{comment} = $ptr; + + # + # Call the template and print the result + # + my $document; + $tt->process( $tt_template, $vars, \$document, + { binmode => ':utf8' } ) + || die $tt->error(), "\n"; + print $document; + + # + # Prompt for moderation. Note: these prompts are used to flag + # the choice in the %updates hash, so don't change them unless + # you also change them elsewhere. + # + $choice = prompt( + 'Choose...', + -menu => [ 'Approve', 'Ban', 'Reject', 'Ignore' ], + '>' + ); + if ($choice) { + print "Choice '$choice' selected\n"; + if ( $choice eq 'Approve' ) { + $updates->{$file}->{action} = $choice; + $log->info("Approved comment in $file") + unless $dry_run; + $updates->{$file}->{moderated} = 1; + } + elsif ( $choice eq 'Ban' ) { + $updates->{$file}->{action} = $choice; + saveError( + "Comment in '$file' banned by the moderator", + $errors, + $log, $dry_run, 0 + ); + $updates->{$file}->{valid} = 0; + $updates->{$file}->{moderated} = 1; + } + elsif ( $choice eq 'Reject' ) { + $updates->{$file}->{action} = $choice; + saveError( + "Comment in '$file' rejected by the moderator", + $errors, + $log, $dry_run, 0 + ); + $updates->{$file}->{valid} = 0; + $updates->{$file}->{moderated} = 1; + } + elsif ( $choice eq 'Ignore' ) { + $updates->{$file}->{action} = $choice; + $log->info("Ignored comment in $file") + unless $dry_run; + $updates->{$file}->{moderated} = 0; + } + } + else { + print "\n"; + undef $choice; # Seem to need this after aborting 'prompt' + print "Ignored this comment\n"; + $log->info("Ignored comment in $file (default action)") + unless $dry_run; + $updates->{$file}->{moderated} = 0; + } + #print "\n"; + } + else { + # + # We couldn't find the episode in the database + # + printf "Episode %s is not known\n", $ptr->{eps_id}; + $updates->{$file}->{action} = 'Ignore'; + saveError( + "Episode referenced in '$file' is not known", + $errors, + $log, $dry_run, 0 + ); + $updates->{$file}->{moderated} = 0; + } + } + } + +} + +#=== FUNCTION ================================================================ +# NAME: do_update +# PURPOSE: Add valid, approved comments to the database (or pretend to do +# so if in dry-run mode) +# PARAMETERS: $dbh database handle +# $updates structure (hashref) holding collected +# information about the files and comments in +# them +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: Nothing +# DESCRIPTION: Performs a database update for all valid approved comments in +# the structure referenced by $updates unless in dry-run mode +# where what would be done will be reported. +# THROWS: No exceptions +# COMMENTS: TODO: References globals and should not. +# SEE ALSO: N/A +#=============================================================================== +sub do_update { + my ( $dbh, $updates, $log, $dry_run, $verbose ) = @_; + + my $sql = SQL::Abstract->new; + my ( $stmt, @bind, $show, $count ); + + $count = 0; + + foreach my $file ( sort_by_timestamp($updates) ) { + if ( $updates->{$file}->{valid} > 0 + && $updates->{$file}->{action} eq 'Approve' ) + { + $count++; + + $show = $updates->{$file}->{JSON}->{eps_id}; + + my $shortfile = $updates{$file}->{shortfile}; + + # + # Build the statement + # NOTE: The JSON keys we wish to use match the column names in the + # 'comments' table. The second argument to 'insert' needs to be + # a hashref, so we use @json_keys to list the keys we need, with + # the whole expression returning an anonymous hash (hashref) + # + ( $stmt, @bind ) + = $sql->insert( 'comments', + { %{ $updates->{$file}->{JSON} }{@json_keys} } ); + + # + # Display the statement we constructed if requested + # + _debug($DEBUG > 1, + "$stmt\n'", + join( "','", @bind ) + ); + + # + # Take action or report potential changes + # + unless ($dry_run) { + # + # Apply the SQL + # + $dbh->do( $stmt, undef, @bind ); + if ( $dbh->err ) { + # + # Something broke during the database update. Warn about + # it, save the error and make the action 'Ignore' so the + # file is kept for further investigation (or to be + # edited). Marking the file as invalid may be overkill. + # + warn $dbh->errstr; + saveError( + $dbh->errstr, + $updates{$file}->{errors}, + $log, $dry_run, $verbose + ); +# $updates{$file}->{valid} = 0; + $updates->{$file}->{action} = 'Ignore'; + } + else { + $log->info("$file: inserted comment for show $show"); + + if ( $verbose > 0 ) { + print "Updated show $show from file $shortfile\n"; + } + } + } + else { + print + "Would have added comment for show $show from file $shortfile\n" + if $verbose > 0; + } + + } + } + + if ($verbose > 0 && $count > 0) { + print "\n", '-' x 80, "\n"; + } +} + +#=== FUNCTION ================================================================ +# NAME: convert_JSON +# PURPOSE: Converts the JSON from a file or an email into an internal +# data structure +# PARAMETERS: $json JSON object +# $jtext JSON in text format +# $updates hashref to contain the details collected from +# the JSON and elsewhere +# $file the file we're processing (hash key) +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: +# DESCRIPTION: +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub convert_JSON { + my ( $json, $jtext, $updates, $file, $log, $dry_run, $verbose ) = @_; + + my ($comment, $dt, $shortfile); + + $shortfile = $updates{$file}->{shortfile}; + + # + # Decode the JSON into a hashref + # + try { + $comment = $json->decode($jtext); + } + + # + # If the JSON decode failed we have a problem + # + catch ($e){ + saveError( + "JSON in file '$shortfile' failed to decode", + $updates->{$file}->{errors}, + $log, $dry_run, $verbose + ); + return 0; + } + + # + # Validate the comment fields: do we have all we expected? + # + unless (validate_comment($comment)) { + saveError( + "JSON in file '$shortfile' failed to validate", + $updates->{$file}->{errors}, + $log, $dry_run, $verbose + ); + return 0; + } + + # + # Perform some cleaning + # + foreach my $key (@cleanable_keys) { + $comment->{$key} = clean_comment_field( $comment->{$key} ); + } + + # + # ISO 8601 timestamps cause MySQL problems (or have started to) so we + # parse them and convert to an acceptable format. We keep the original + # format in $verify is set and we have to check its age. + # + try { + $dt = DateTime::Format::ISO8601->parse_datetime( + $comment->{comment_timestamp} ); + } + catch ($e){ + die "Failed to parse comment timestamp\n"; + } + $comment->{orig_timestamp} = $comment->{comment_timestamp}; + $comment->{comment_timestamp} = $dt->ymd . ' ' . $dt->hms; + + # + # Stash the JSON for later + # + $updates->{$file}->{JSON} = $comment; + + # + # This file is valid we think + # + $updates->{$file}->{valid} = 1; + $log->info("$file seems valid") unless $dry_run; + + return 1; + +} + +#=== FUNCTION ================================================================ +# NAME: report_action +# PURPOSE: Does all of the necessary reporting as the email or JSON files +# are being acted upon +# PARAMETERS: $struct hashref holding collected comment data for +# a given mail file +# $destname name of destination +# $dest destination directory +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: Nothing +# DESCRIPTION: In dry run mode reports what would have been done along with +# any errors, but only in verbose mode. Otherwise reports the +# action (in verbose mode) and logs it too. It also moves the +# mail file to the requested directory. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub report_action { + my ( $struct, $destname, $dest, $log, $dry_run, $verbose ) = @_; + + # + # Get values with a hashref slice + # + my ( $file, $shortfile ) = @$struct{ 'file', 'shortfile' }; + + unless ($dry_run) { + if ($verbose > 0) { + print "Moving '$shortfile' to '$destname'\n"; + print_errors($struct->{$file}->{errors}); + } + $log->info("Moving '$file' to '$destname'"); + unless ( moveFile( $file, $dest ) ) { + warn "Unable to move $shortfile\n"; + $log->error("Failed to move $shortfile"); + } + } + else { + if ($verbose > 0) { + print "Would move '$shortfile' to '$destname' (dry run)\n"; + print_errors($struct->{$file}->{errors}); + } + } + + return; +} + +#=== FUNCTION ================================================================ +# NAME: call_back +# PURPOSE: Invokes a PHP interface on the HPR server through which +# a comment can be marked as fully processed +# PARAMETERS: $format sprintf template for building the URL +# $key the key from the JSON +# $action the action to be taken +# $user the username from the configuration +# $pass the password from the configuration +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: True (1) if the call performed as it should, otherwise false +# (0) +# DESCRIPTION: Checks that the correct parameters have been supplied, then +# builds the URL we're going to use from the format string, key +# and action. If in dry-run mode this URL is reported and +# nothing more. Otherwise the URL is queried with an HTTP 'GET'. +# If the response shows success then the action is considered to +# have been taken. The returned message is displayed (in verbose +# mode) and success is logged. If the 'GET' failed then +# a warning message is generated along with a warning log entry. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub call_back { + my ( $format, $key, $action, $user, $pass, $log, $dry_run, $verbose ) = @_; + + my ( $ua, $url, $response, $code, $message, $content ); + + # + # Check calling parameters + # + unless ($key) { + warn "Invalid call to 'call_back' subroutine; missing key\n"; + return 0; + } + + unless ( $action =~ /^(approve|block|delete)$/ ) { + warn "Invalid call to 'call_back' subroutine; invalid action\n"; + return 0; + } + + # + # Build the URL + # + $url = sprintf( $format, $key, $action ); + + # + # Perform the call to clean up the JSON files, etc, or just say what would + # have been done + # + unless ($dry_run) { + print "Making the server call:\nGET $url\n" if $verbose > 0; + + $ua = LWP::UserAgent->new; + $ua->timeout(180); # Module default is 180 sec = 3 mins + $ua->credentials("hub.hackerpublicradio.org:443", "cms", $user, $pass); + + $response = $ua->get($url); + if ( $response->is_success ) { + # + # Server call succeeded + # + $code = $response->code; + $message = $response->message; + $content = $response->decoded_content; + if ($verbose > 0) { + print "Success: $code/$message\n"; + print "Content: $content\n" if $content; + } + $log->info("Successful server message $key/$action: " . + $response->status_line); + return 1; + } + else { + # + # Server call failed + # + warn $response->status_line . "\n"; + $log->error("Failed server message $key/$action: " . + $response->status_line); + # Try to log as much as possible + $log->error( $response->decoded_content ) + if $response->decoded_content; + return 0; + } + } + else { + # + # Always a success in dry-run mode + # + print "Would have made the server call:\nGET $url\n" if $verbose > 0; + return 1; + } + +} + +#=== FUNCTION ================================================================ +# NAME: print_errors +# PURPOSE: Prints the contents of the error array if there's anything in +# it (i.e. not empty) +# PARAMETERS: $errors arrayref possibly containing errors +# RETURNS: Nothing +# DESCRIPTION: Simplifies printing in the case when there's nothing to print +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub print_errors { + my ($errors) = @_; + + return unless defined($errors); + + print "Errors: ", join( "\n", @{$errors} ), "\n" + if @{$errors}; +} + +#=== FUNCTION ================================================================ +# NAME: saveError +# PURPOSE: Saves an error message in an error structure and logs it if in +# live mode +# PARAMETERS: $message error message text +# $struct structure (arrayref) to hold the text +# $log log handle +# $dry_run Boolean flagging dry-run mode +# $verbose Integer level of verbosity +# RETURNS: Nothing +# DESCRIPTION: Simplifies the saving of an error message and logging it +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub saveError { + my ( $message, $struct, $log, $dry_run, $verbose ) = @_; + + $log->warn($message) unless $dry_run; + push( @{$struct}, $message ); + print "$message\n" if $verbose > 0; +} + +#=== FUNCTION ================================================================ +# NAME: validate_comment +# PURPOSE: Checks that all of the required fields are present in the +# JSON comment +# PARAMETERS: $comment - a pointer to the JSON hash +# RETURNS: True (1) if valid, otherwise false (0) +# DESCRIPTION: +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub validate_comment { + my ($comment) = @_; + + foreach my $key (@json_keys) { + return 0 unless defined($comment->{$key}); + } + + return 1; +} + +#=== FUNCTION ================================================================ +# NAME: count_tags +# PURPOSE: Counts the number of start tags in what might be HTML +# PARAMETERS: $string String to examine +# RETURNS: An integer count of the number of start tags found +# DESCRIPTION: Uses HTML::Parser to parse the input string. A handler is +# defined which accumulates start and end tags in an array. The +# number of start tags found is returned to the caller. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub count_tags { + my ($string) = @_; + + my @accum; + chomp($string); + return 0 unless $string; + + my $p = HTML::Parser->new( + api_version => 3, + handlers => { + start => [ \@accum, "event,text" ], + end => [ \@accum, "event,text" ] + } + ); + $p->parse($string); + $p->eof; + + return scalar(@accum); +} + +#=== FUNCTION ================================================================ +# NAME: clean_comment_field +# PURPOSE: Cleans a string field from the comment +# PARAMETERS: $field - the field to be cleaned +# RETURNS: The cleaned string +# DESCRIPTION: We want to remove all HTML. We also want to add HTML entities +# where these may be needed. If there are already entities in +# the data we will decode them first then re-encode so we don't +# get double encoding. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub clean_comment_field { + my ($field) = @_; + + # + # Remove all HTML + # + my $hr = HTML::Restrict->new(); + $field = $hr->process($field); + + # + # Entities. Make sure all existing entities are decoded first, then encode + # the result. Note that the unsafe characters are the reverse of what's + # listed. The second argument to encode_entities uses regular expression + # character class syntax (as if in [] in a RE). The set is all non-plain + # ASCII exclusing newline, carriage return and '<>&'. + # + decode_entities($field); + $field + = encode_entities( $field, '^\n\r\x20-\x25\x27-\x3b\x3d\x3f-\x7e' ); + + return $field; +} + +#=== FUNCTION ================================================================ +# NAME: moveFile +# PURPOSE: Moves a file (renames it) taking account of whether the name +# exists at the destination +# PARAMETERS: $file file to move +# $dest destination of the file +# RETURNS: True (1) if the move succeeded, otherwise false (0) +# DESCRIPTION: A wrapper around the File::Copy 'move' function. If the target +# file exists then a version number is appended to the name so +# that the original is not clobbered. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub moveFile { + my ( $file, $dest ) = @_; + + my ( $target, $basename, $prefix, $vsn ); + + return 0 unless -e $file; + + # + # Handle a destination directory + # + if ( -d $dest ) { + ( $basename = $file ) =~ s|.*/||mx; + $target = "$dest/$basename"; + } + else { + $target = $dest; + } + + # + # If the target exists we'll have to do some work, otherwise just do the + # move + # + if ( -e $target ) { + if ( ( $prefix, $vsn ) = ( $target =~ /(\.)(\d+)$/ ) ) { + while ( -e "$prefix$vsn" ) { + $vsn++; + $target = "$prefix$vsn"; + } + return move( $file, $target ); + } + else { + $vsn = 1; + $prefix = $target; + $target = "$prefix.$vsn"; + while ( -e "$prefix.$vsn" ) { + $vsn++; + $target = "$prefix.$vsn"; + } + return move( $file, $target ); + } + } + else { + return move( $file, $dest ); + } +} + +#=== FUNCTION ================================================================ +# NAME: abbreviate_filename +# PURPOSE: Abbreviates a long pathname by shrinking the directory names. +# PARAMETERS: $filename The name of the file +# RETURNS: A string containing the abbreviated name +# DESCRIPTION: Splits the filename, assembles the directories as just their +# first character, and then appends the filename. A bit +# primitive but it works. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub abbreviate_filename { + my ($filename) = @_; + + my @elements = split('/',$filename); + my @shortened = map {substr($_,0,1)} @elements[0..$#elements-1]; + + return join('/',@shortened,$elements[-1]); +} + +#=== FUNCTION ================================================================ +# NAME: sort_by_timestamp +# PURPOSE: Sorts the main hash by timestamp rather than the normal sort +# by key +# PARAMETERS: $struct hashref pointing to the main structure +# RETURNS: A list of hash keys in order of timestamp +# DESCRIPTION: Generates a list of keys by sorting based on the comment +# timestamp in the JSON hash. Performs the sort by doing +# a string comparison since the dates are in ISO8601 +# 'YYYY-MM-DDTHH:MM:SSZ' format (e.g. 2017-10-06T06:22:55Z). +# This seems to be safe. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub sort_by_timestamp { + my ($struct) = @_; + + my @result = sort { + $struct->{$a}->{JSON}->{comment_timestamp} + cmp $struct->{$b}->{JSON}->{comment_timestamp} + } keys(%$struct); + + return @result; +} + +#=== FUNCTION ================================================================ +# NAME: _debug +# PURPOSE: Prints debug reports +# PARAMETERS: $active Boolean: 1 for print, 0 for no print +# @messages List of messages to print +# RETURNS: Nothing +# DESCRIPTION: Outputs messages to STDERR if $active is true. For each +# message it removes any trailing newline and then adds one in +# the 'print' so the caller doesn't have to bother. Prepends +# each message with 'D> ' to show it's a debug message. +# THROWS: No exceptions +# COMMENTS: None +# SEE ALSO: N/A +#=============================================================================== +sub _debug { + my ( $active, @messages ) = @_; + + return unless $active; + foreach my $msg (@messages) { + chomp($msg); + print STDERR "D> $msg\n"; + } +} + +#=== FUNCTION ================================================================ +# NAME: Options +# PURPOSE: Processes command-line options +# PARAMETERS: $optref Hash reference to hold the options +# RETURNS: Undef +# DESCRIPTION: +# THROWS: no exceptions +# COMMENTS: none +# SEE ALSO: n/a +#=============================================================================== +sub Options { + my ($optref) = @_; + + my @options = ( + "help", "documentation|manpage", + "debug=i", "dry-run!", + "verbose+", "verify!", + "live!", "json!", + "config=s", + ); + + if ( !GetOptions( $optref, @options ) ) { + pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 ); + } + + return; +} + +__END__ + + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Application Documentation +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#{{{ + +=head1 NAME + +process_comments + +=over 4 + +Process incoming comment files as email messages or JSON files + +=back + +=head1 VERSION + +This documentation refers to process_comments version 0.3.3 + + +=head1 USAGE + + ./process_comments [-help] [-doc] [-debug=N] [-[no]dry-run] + [-verbose ...] [-[no]live] [-[no]json] [-config=FILE] + + ./process_comments -dry-run + ./process_comments -debug=3 -dry-run + ./process_comments -verbose -verbose + ./process_comments -help + ./process_comments -documentation + ./process_comments -json + ./process_comments -verb -live -config=.process_comments_live.cfg + +=head1 OPTIONS + +=over 8 + +=item B<-help> + +Prints a brief help message describing the usage of the program, and then exits. + +=item B<-documentation> or B<-manpage> + +Prints the entire embedded documentation for the program, then exits. + +=item B<-debug=N> + +Enables debugging mode when N > 0 (zero is the default). The levels are: + +=over 4 + +=item B<1> + +Reports on the options chosen. Running with the B<-dry-run> option prevents +problems with comments if any options have been incorrectly used. + +=item B<2> + +Prints all of the information described at the previous level. + +Prints the names of files found in the mail or JSON spool areas. + +Prints the internal details of the email, listing the MIME parts (if there are any). + +Prints the length of the MIME part matching the desired type, in lines. + +Prints the entirety of the internal structure holding details of the mail file +and the comment it contains. This follows the moderation pass. + +Prints the SQL that has been constructed to update the database. + +=item B<3> + +Prints all of the information described at the previous levels. + +Prints the contents of the B<%config> hash built from the configuration file. + +=back + +=item B<-[no]dry-run> + +Controls the program's B mode. It is off by default. In dry-run mode +the program reports what it would do but makes no changes. When off the +program makes all the changes it is designed to perform. + +=item B<-verbose> + +This option may be repeated. For each repetition the level of verbosity is +increased. By default no verbosity is in effect and the program prints out the +minimal amount of information. + +Verbosity levels: + +=over 4 + +=item B<1> + +Prints the name of each mail (or JSON) file as it's processed. + +Prints any error messages during message validation, which are also being +logged (unless in dry-run mode) and saved for reporting later. + +Prints a notification if the comment is added to the database (or that this +would have happened in dry-run mode). + +Prints messages about the moving of each mail (or JSON) file from the +processing area, along with any errors accumulated for that file. In dry-run +mode simply indicates what would have happened. + +Prints the response code received from the server when invoking the interface +for updating comment files there. If in dry-run mode the message produced +merely indicates what would have happened. + +If validation failed earlier on then further information is produced about the +final actions taken on these files. + +=item B<2> + +Prints the addresses each mail message is being sent to (unless in JSON mode). + +=item B<3> + +Prints the JSON contents of each mail message (or of each JSON file). + +=back + +=item B<-[no]verify> + +This option controls whether the script imposes a delay on comments. The idea +is that if the comment feature is used to rant on a subject or to pass +misinformation, delaying them will help to defuse the situation. + +The default state is B<-noverify>; a delay is not imposed. Selecting +B<-verify> means that comments have to be at least 24 hours old before they +are processed. The length of the delay cannot currently be changed without +altering the script. + +This feature of the script has hardly been used and is perhaps against the +spirit of HPR, so it will probably be removed in a future version. + +=item B<-[no]live> + +This option determines whether the program runs in live mode or not. The +default varies depending on which system it is being run on. + +IT SHOULD NOT USUALLY BE NECESSARY TO USE THIS! + +In live mode the program makes changes to the live database and sends messages +to the live web interface when a comment has been processed. With live mode +off the program assumes it is writing to a clone of the database and it does +not inform the webserver that a comment has been processed. + +The default for the copy of the program on borg is that live mode is ON. +Otherwise the default is that live mode is OFF. The setting is determined by +the sed script called B on borg. This needs to be run whenever +a new version of the program is released. This is done as follows: + + sed -i -f fixup.sed process_comments + +=item B<-[no]json> + +This option selects JSON mode, which makes the script behave in a different +way from the default mode (B<-nojson> or MAIL mode) where it processes email +containing comments. + +In JSON mode the script looks in a sub-directory called B where it +expects to find JSON files. The normal way in which these files arrive in this +directory is by using B to copy them from the HPR server (the directory +is B). This is a provision in case the normal route of +sending out email messages has failed for some reason. It also saves the user +from setting up the mail handling infrastructure that would otherwise be +needed. + +In JSON mode the mail handling logic is not invoked, files are searched for in +the B directory and each file is processed, moderation is requested and +the comment is added to the database. In `B<-live>` mode the server is informed +that the comment has been processed. + +The B directory needs to have three sub-directories: B, +B and B. The script will place the processed files into +these sub-directories according to the moderation choice made. This makes it +easier to see what actions were taken and helps avoid repeated processing of +the same comment. + +=item B<-config=FILE> + +This option defines a configuration file other than the default +B<.hpr_db.cfg>. The file must be formatted as described below in the section +B. + +=back + +=head1 DESCRIPTION + +A script to process new comments, moderate them and add them to the HPR +database. + +In the new HPR comment system (released September 2017) a new web form is +presented in association with each show. The form can be used to submit +a comment on the show in question and takes some standard fields: the name of +the commenter, the title of the comment and the body of the comment itself. + +Once the comment has been submitted its contents are formatted as a JSON +object and are sent as a mail attachment to the address +B. + +Recipients of these mail messages can then perform actions on the comments +they contain to cause them to be added to the HPR database. These actions are: +approve the comment, block it (because it is inappropriate or some form of +Spam and we want to prevent any further messages from the associated IP +address), or reject it (delete it). There is also an ignore option which skips +the current comment in this run of the script so it can be revisited later. + +This script can process an entire email message which has been saved to a file +or a file containing the JSON object (as in the email attachment). When +processing email it is expected that it will be found in a maildrop directory, +and when finished the messages will be placed in sub-directories according to +what actions were carried out. A similar logic is used for JSON files; they +are expected to be in a drop area and are moved to sub-directories after +processing. + +The directories used as spool areas are defined in the configuration file(s) +described below. + +=head2 CHANGES + +=head3 2024 + +An alternative method of comment approval has been added using PHP on the +server B. This script was originally +designed around receiving email messages containing JSON or collecting JSON +files. The assumption was that it was the sole agent processing these messages +or files and would therefore be the only way that comments could enter the +database. + +With the new server route it became possible for an emailed comment to be +received when that comment was already in the database. + +As of version B<0.3.2> the script has been enhanced to check whether comments +received as mail messages (or JSON files) have already been processed. If this +is found to be the case the incoming data is skipped and stored in the +'B' directory (because the comment has been approved elsewhere). + +There is still scope for confusion - if for example, a comment has been +rejected on the server, the script will see it as a potential addition to the +database. The moderators need to coordinate their decisions! + +The ultimate solution will be to have just one way of moderating and +installing comments which is available to any of the "Janitors". + +=head2 MAIL HANDLING + +One way of handling incoming mail is to use a mail client which is capable of +saving messages sent to the above address in the spool area mentioned earlier. +For example, Thunderbird can do this by use of a filter and a plugin. Other +MUA's will have similar capabilities. + +When this script is run on the mail spool area it will process all of the +files it finds. For each file it will check its validity in various ways, +display the comment then offer a moderation menu. The moderation options are +described below. + +=head3 APPROVE + +If a comment is approved then it will be added to the database, the associated +mail file will be moved to a sub-directory (by default called 'B'), +and the HPR server will be notified of this action. + +=head3 BAN + +If a comment is banned then it will not be added to the database. The mail +file will be moved to the sub-directory 'B' and the HPR server will be +informed that the IP address associated with the comment should be placed on +a black list. + +=head3 REJECT + +If a comment is rejected it is not written to the database, the mail file is +moved to the sub-directory 'B' and the HPR server informed that the +comment can be deleted. + +=head3 IGNORE + +If a comment is ignored it is simply left in the mail spool and no further +processing done on it. It will be eligible for processing again when the +script is next run. + +=head2 JSON FILE HANDLING + +As described under the description of the B<-[no]json> option, the script +allows the processing of a multiple JSON files, each containing a single +comment. These files are expected to be in another spool area (defined int he +configuration file). The JSON is checked and all of the comment fields are +validated, then the moderation process is begun. + +Moderation in this case consists of the same steps as described above except +that no mail file actions are taken and the JSON file is moved to +a sub-directory after processing. + +=head1 DIAGNOSTICS + +=over 4 + +=item B + +Type: fatal + +The nominated configuration file referenced in B<-config=FILE> was not found. + +=item B + +Type: fatal + +No mail files were found in the mail spool area requiring processing. + +=item B + +Type: fatal + +No JSON files were found in the JSON spool area requiring processing. + +=item B + +Type: fatal + +A JSON file in the spool area could not be read with a JSON parser. + +=item B + +Type: fatal + +The timestamp must be converted to a format compatible with MySQL/MariaDB but +during this process the parse failed. + +=item B + +Type: fatal + +A mail file in the spool area could not be opened. + +=item B + +Type: warning + +A mail file could not be moved to the relevant sub-directory. + +=item B + +Type: warning + +A mail file in the spool area could not be closed. + +=item B + +Type: fatal, warning + +An action on the database has been flagged as an error. + +=item B + +Type: fatal + +An action relating to the template used for the display of the comment has +been flagged as an error. + +=item B + +Type: warning + +The routine 'call_back' was called incorrectly. The key was missing. + +=item B + +Type: warning + +The routine 'call_back' was called incorrectly. The action was invalid. + +=item B + +Type: warning + +While attempting to send an action to the remote server with the 'call_back' +subroutine an error message was received. + +=back + +=head1 CONFIGURATION AND ENVIRONMENT + +=head2 CONFIGURATION + +The configuration file design was changed for version B<0.2.7> and beyond. + +The script expects a configuration file to provide it with various program +settings, database credentials and HPR CMS credentials. + +The default configuration file name is B<.process_comments.cfg> and is for use +with the local clone of the hpr database. + +This file has the general structure: + + <> + + <> + + + user = "dummy:dummy" + + +It uses B 'B' directives to include the file +B<.process_comments_settings.cfg> which holds the program settings. It then +includes the file B<.hpr_db.cfg> which containss credentials for the cloned +database. Finally it defines dummy credentials for the HPR CMS since this +feature is not used by default. + +The more useful configuration is in B<.process_comments_live.cfg>: + + <> + + <> + + + <> + + +The basic format of the main configuration data after the inclusions have been +performed is then: + + + # Holds program settings + + + # Holds database settings + + + # Holds CMS credentials + + +=head3 CONTENTS OF B<.process_comments_settings.cfg>: + +This section is enclosed in '' tags: + + + PROG = process_comments + + # + # Defaults + # + basedir = "$HOME/HPR/Comment_system" + configfile = "$basedir/.hpr_db.cfg" + logfile = "$basedir/logs/${PROG}.log" + template = "$basedir/${PROG}.tpl" + + # + # Mail message stash area + # + maildrop = "$HOME/HPR/CommentDrop" + processed = "$maildrop/processed" + rejected = "$maildrop/rejected" + banned = "$maildrop/banned" + + # + # JSON stash area + # + jsondir = "$basedir/json" + jprocessed = "$jsondir/processed" + jrejected = "$jsondir/rejected" + jbanned = "$jsondir/banned" + + # + # How to tell the server the comment's processed + # + callback_template = \ + "https://hackerpublicradio.org/comment_process.php?key=%s&action=%s" + + + +=head3 CONTENTS OF B<.hpr_db.cfg> AND B<.hpr_livedb.cfg> + +The default file providing this part is just for use with the default +database. The section is enclosed in '' tags: + + + host = 127.0.0.1 + port = PORT + name = DATABASE + user = USERNAME + password = PASSWORD + + +These settings can alternatively be used to connect to an SSH tunnel which has +been connected from a remote system (like borg) to the live database. +Assuming the port chosen for this is 3307 something like the following could +be used: + + + host = 127.0.0.1 + port = 3307 + name = hpr_hpr + user = hpr_hpr + password = "**censored**" + + +A typical Bash script for opening a tunnel might be: + + #!/bin/bash + SSHPORT=22 + LOCALPORT=3307 + REMOTEPORT=3306 + ssh -p ${SSHPORT} -f -N -L localhost:${LOCALPORT}:localhost:${REMOTEPORT} hpr@hackerpublicradio.org + +=head3 CONTENTS OF B<.hpradmin_curlrc> + +This is a file used by the 'curl' command to contain credentials to access the +CMS on the HPR server. The format needs to be: + + user = hpradmin:**censored** + +This is one of the formats allowed by 'curl' and is also compatible with the +B module used by this script. + +=head2 TEMPLATE + +The program displays the comment that is currently being processed for +moderation. It uses a template along with the Perl B