1
0
forked from HPR/hpr-tools
hpr-tools/Comment_system/process_comments

2344 lines
77 KiB
Perl
Executable File

#!/usr/bin/env perl
#===============================================================================
#
# FILE: process_comments
#
# USAGE: ./process_comments [-help] [-docmentation|manpage] [-debug=N]
# [-[no]dry-run] [-verbose ...] [-[no]verify] [-[no]live]
# [-[no]json] [-config=FILE]
#
# DESCRIPTION: Read new comments received as mail messages (or as JSON files)
# and perform moderation actions on them before possibly adding
# them to the HPR database.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: 2023-12-23 enabled the experimental::try feature and removed
# the use of 'Try::Tiny'. Revert by removing "use feature 'try'"
# and 'no warnings "experimental::try"'. Remove the '($e)' from
# each 'catch' and end the 'catch' braces with a semicolon. Add
# back 'use Try::Tiny'.
#
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.3.3
# CREATED: 2017-09-04 18:06:49
# REVISION: 2024-01-10 13:02:51
#
#===============================================================================
use v5.36; # say and try/catch
use feature 'try';
use strict;
use warnings;
no warnings "experimental::try";
use utf8;
use Carp;
use Getopt::Long;
use Pod::Usage;
use Config::General;
use List::Util qw{max};
use HTML::Parser;
use HTML::Restrict;
use HTML::Entities;
use File::Find::Rule;
use File::Copy;
use File::Slurper 'read_text';
#use IO::All;
use JSON;
use Mail::Address;
use Mail::Field;
use Mail::Internet;
use MIME::Parser;
use Encode qw(decode encode);
use DateTime::Format::ISO8601;
use IO::Prompter;
use Template;
use SQL::Abstract;
use LWP::UserAgent;
#use TryCatch; # Failing 2020-05-31 due to a problem in Devel::Declare
#use Try::Tiny; # Using in-built experimental try/catch
use Log::Handler;
use DBI;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.3.3';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Comment_system";
my $configfile = "$basedir/.${PROG}.cfg";
my ( $dbh, $sth1, $h1 );
my ( @ignore, @files );
my ($comment, %updates, $shortfile);
#
# The MIME part we're looking for
#
my $target_mimetype = 'application/json';
#
# All of the keys in the JSON we're interested in (when checking and adding to
# the database)
#
my @json_keys = (
"eps_id", "comment_timestamp",
"comment_author_name", "comment_title",
"comment_text",
);
#
# Keys we need to clean
#
my @cleanable_keys = (
"comment_author_name", "comment_title",
"comment_text",
);
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
my $DEF_DEBUG = 0;
my $DEF_LIVE = 0; # Gets edited on borg
#
# Process options
#
my %options;
Options( \%options );
#
# Default help
#
pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 )
if ( $options{'help'} );
#
# Full documentation if requested with -doc
#
pod2usage(
-msg => "$PROG version $VERSION\n",
-verbose => 2,
-exitval => 1,
-noperldoc => 0,
) if ( $options{'documentation'} );
#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
my $cfgfile
= ( defined( $options{config} ) ? $options{config} : $configfile );
my $live = ( defined( $options{live} ) ? $options{live} : $DEF_LIVE );
my $verify = ( defined( $options{verify} ) ? $options{verify} : 0 );
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
my $jsonmode = ( defined( $options{json} ) ? $options{json} : 0 );
#
# Report on the options if requested
#
if ($DEBUG > 0) {
_debug(1, 'Options');
_debug(1,
'-config=' . $cfgfile,
($live eq 0 ? '-nolive ' : '-live'),
($verify eq 0 ? '-noverify ' : '-verify'),
($dry_run eq 0 ? '-nodry-run ' : '-dry-run'),
'-verbose=[' . $verbose . ']',
($jsonmode eq 0 ? '-nojson ' : '-json'),
);
_debug(1, '');
}
#
# Sanity checks
#
die "Unable to find configuration file: $cfgfile\n" unless ( -e $cfgfile );
#-------------------------------------------------------------------------------
# Configuration file - load data
#-------------------------------------------------------------------------------
my $conf = Config::General->new(
-ConfigFile => $cfgfile,
-InterPolateVars => 1,
-InterPolateEnv => 1,
-ExtendedAccess => 1,
);
my %config = $conf->getall();
_debug($DEBUG > 2, 'Configuration settings:', Dumper(\%config));
my $logfile = $config{settings}->{logfile};
my $template = $config{settings}->{template};
my $maildrop = $config{settings}->{maildrop};
my $processed = $config{settings}->{processed};
my $rejected = $config{settings}->{rejected};
my $banned = $config{settings}->{banned};
my $jsondir = $config{settings}->{jsondir};
my $jprocessed = $config{settings}->{jprocessed};
my $jrejected = $config{settings}->{jrejected};
my $jbanned = $config{settings}->{jbanned};
my $callback_template = $config{settings}->{callback_template};
my ($cb_user, $cb_password) = split(":",$config{cms}->{user});
#-------------------------------------------------------------------------------
# Moderation responses and the arguments associated with them (from the
# configuration data).
#-------------------------------------------------------------------------------
#
# key => [ 3 element array ]
#
# - key is the menu element from do_moderation
# - the array contains:
# - text to be used in messages
# - anonymous subroutine which returns the directory into which to place
# the file; call it with the $jsonmode variable
# - action keyword to send back to the server
#
my %moderation_keys = (
'Approve' => [
'processed',
sub { ( $_[0] ? $jprocessed : $processed ) },
'approve'
],
'Ban' => [
'banned',
sub { ( $_[0] ? $jbanned : $banned ) },
'block'
],
'Reject' => [
'rejected',
sub { ( $_[0] ? $jrejected : $rejected ) },
'delete'
],
);
#
# Make a RE from the keys
#
my $mkeys = join('|',keys(%moderation_keys));
my $mkeys_re = qr{^$mkeys$};
#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd = $config{database}->{password};
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
$dbuser, $dbpwd, { AutoCommit => 1 } )
or croak $DBI::errstr;
#$dbh = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport",
# $dbuser, $dbpwd, { AutoCommit => 1 } )
# or croak $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh->{mysqlenable_utf8} = 1;
#
# Set the local timezone to UTC for this connection
#
$dbh->do("set time_zone = '+00:00'") or carp $dbh->errstr;
#-------------------------------------------------------------------------------
# Set up logging keeping the default log layout except for the date
#-------------------------------------------------------------------------------
my $log = Log::Handler->new();
$log->add(
file => {
timeformat => "%F %H:%M:%S (%s)",
filename => $logfile,
maxlevel => 7,
minlevel => 0
}
);
#-------------------------------------------------------------------------------
# If mode JSON is selected with -json option we'll check the defined 'json'
# directory, otherwise we'll look for work in the maildrop.
#-------------------------------------------------------------------------------
if ($jsonmode) {
#
# Look for JSON files
#
@ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' );
my $jsonfileRE = qr{(\.(?i:json)|[^.]+)$};
#<<<
@files = File::Find::Rule
->file()
->name($jsonfileRE)
->not( File::Find::Rule->new->name(@ignore) )
->maxdepth(1)
->in($jsondir);
#>>>
if ( $DEBUG > 1 ) {
_debug(1,
"Files found in $jsondir",
Dumper( \@files ),
);
}
#
# There may be nothing there
#
die "No JSON files found; nothing to do\n" unless (@files);
#
# Convert the JSON files into the %updates hash
#
do_JSON_process( \@files, \%updates, $log, $dry_run, $verbose );
}
else {
#
# Look for mail files
#
@ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' );
my $mailfileRE = qr{(\.(?i:eml)|[^.]+)$};
#<<<
@files = File::Find::Rule
->file()
->name($mailfileRE)
->not( File::Find::Rule->new->name(@ignore) )
->maxdepth(1)
->in($maildrop);
#>>>
if ( $DEBUG > 1 ) {
_debug(1,
"Files found in $maildrop",
Dumper( \@files ),
);
}
#
# There may be nothing there
#
unless (@files) {
warn "No mail found; nothing to do\n";
exit;
}
#
# Convert the mail messages into the %updates hash
#
do_mail_process( \@files, \%updates, $log, $dry_run, $verbose );
}
#-------------------------------------------------------------------------------
# Check that comments aren't already in the database
#-------------------------------------------------------------------------------
do_database_check( $dbh, \%updates, $log, $verify, $dry_run );
#-------------------------------------------------------------------------------
# Perform the moderation pass
#-------------------------------------------------------------------------------
do_moderation( $dbh, \%updates, $template, $log, $verify, $dry_run );
#
# Dump the data structure if requested
#
_debug($DEBUG > 1, '%updates: ' . Dumper(\%updates));
#-------------------------------------------------------------------------------
# For the comments which are valid and approved, add them to the database
#-------------------------------------------------------------------------------
print '-' x 80, "\n" if $verbose > 0;
do_update( $dbh, \%updates, $log, $dry_run, $verbose );
#-------------------------------------------------------------------------------
# Walk the captured data reporting what happened (would happen) moving the
# files to the appropriate places and telling the server to do things to the
# originals (if appropriate)
#-------------------------------------------------------------------------------
foreach my $file ( sort_by_timestamp( \%updates ) ) {
#
# Deal with each file depending on the selected action (from the set
# 'Approve', 'Ban' and 'Reject')
#
if ( $updates{$file}->{action} =~ $mkeys_re ) {
#
# Get the elements we need for the function arguments below.
# NOTE: Apologies! This is obscure. The action key will be 'Approve',
# 'Ban' or 'Reject', and the lookup will return the name of the
# destination (for messages), an anonymous subroutine reference which determines
# the directory, and a keyword to send to the web server to dispose of
# the comment. The subref must be called with $jsonmode as the
# argument to get the destination which is different depending on
# whether we're in mail or JSON mode. This really needs to be made
# simpler and easier to understand when this code is revisited!
#
my ( $name, $destsub, $serverac )
= ( @{ $moderation_keys{ $updates{$file}->{action} } } );
my $dest = &$destsub($jsonmode);
#
# Print moderation settings
#
_debug($DEBUG > 1,
'$name: ' . $name,
'$dest: ' . $dest,
'$serverac: ' . $serverac,
);
#
# Report what we're doing with the file and do it if appropriate
#
report_action( $updates{$file}, $name, $dest, $log,
$dry_run, $verbose );
#
# Tell the server what to do, but only if this is the live instance
# and we're not in dry-run mode
#
if ($live) {
unless (
call_back(
$callback_template, $updates{$file}->{JSON}->{key},
$serverac, $cb_user, $cb_password, $log, $dry_run, $verbose
)
)
{
print "Server update failed\n" if $verbose > 0;
}
}
else {
unless ($dry_run) {
print "Not live; no server action taken\n" if $verbose > 0;
}
}
}
elsif ( $updates{$file}->{action} eq 'Ignore' ) {
#
# The action here is 'Ignore' which means just leave it alone. Since
# only the moderator can choose this, or the failure to find the
# episode in the database, we know it's not a broken mail message or
# bad JSON.
#
if ( $verbose > 0 ) {
print "File '$file' ignored\n";
print_errors( $updates{$file}->{errors} );
}
}
elsif ( $updates{$file}->{skip} > 0 ) {
#
# The comment was found to be already in the database so we are
# skipping it. We want to report this and move the file to the
# 'processed' directory.
#
$shortfile = $updates{$file}->{shortfile};
unless ($dry_run) {
if ( $verbose > 0 ) {
print "Moving '$shortfile' to 'processed'\n";
print "Reason: comment is already in the database\n";
print_errors( $updates{$file}->{errors} );
}
$log->info("Moving $file to 'processed'");
unless ( moveFile( $file, ($jsonmode ? $jprocessed : $processed) ) ) {
warn "Unable to move $file\n";
$log->error("Failed to move $file");
}
}
else {
if ( $verbose > 0 ) {
print "Would move '$shortfile' to 'processed' (dry run)\n";
print "Reason: comment is already in the database\n";
print_errors( $updates{$file}->{errors} );
}
}
}
else {
#
# We aborted this one early in the processing and validation, so it's
# really messed up. We're going to reject it outright with error
# messages. We can't tell the server because we don't have the
# key to do it.
#
$shortfile = $updates{$file}->{shortfile};
unless ($dry_run) {
if ( $verbose > 0 ) {
print "Moving $shortfile to 'rejected'\n";
print "Reason: failed validation\n";
print_errors( $updates{$file}->{errors} );
}
$log->info("Moving $file to 'rejected'");
unless ( moveFile( $file, ($jsonmode ? $jrejected : $rejected) ) ) {
warn "Unable to move $file\n";
$log->error("Failed to move $file");
}
}
else {
if ( $verbose > 0 ) {
print "Would move $shortfile to 'rejected' (dry run)\n";
print "Reason: failed validation\n";
print_errors( $updates{$file}->{errors} );
}
}
}
print '.' x 80, "\n" if $verbose > 0;
}
exit;
#=== FUNCTION ================================================================
# NAME: do_JSON_process
# PURPOSE: Process any pending JSON files found in a directory search
# PARAMETERS: $files arrayref containing the JSON files found
# $updates hashref to contain the details collected from
# the JSON files
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS: Nothing
# DESCRIPTION: The files containing JSON comments have been found earlier and
# passed to this function. They are processed in sorted order.
# The JSON is decoded into a hash and is validated, and the
# elements cleaned up. The JSON is stored for later in the
# $updates structure. If all tests and checks are passed the
# entry for this file in the $updates structure is marked as
# valid. If anything fails the entry is marked as invalid for
# the next stages of processing.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub do_JSON_process {
my ( $files, $updates, $log, $dry_run, $verbose ) = @_;
my ( $shortfile, $dt );
my $json = JSON->new;
#
# Examine the files we found
#
foreach my $file ( sort(@$files) ) {
print ">> $file\n" if $verbose > 0;
$log->notice("Processing $file") unless $dry_run;
#
# Initialise for this file, marking it as invalid by default.
#
$updates->{$file} = {};
$updates->{$file}->{valid} = 0;
$updates->{$file}->{moderated} = 0;
$updates->{$file}->{errors} = [];
$updates->{$file}->{action} = '';
$updates->{$file}->{file} = $file; # For convenience
$updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file);
#
# Open the nominated file and load its contents into a scalar. Because
# 'read_text' does its own internal error handling we need try/catch to
# intercept it and provide our own.
#
my $json_text;
try {
$json_text = read_text($file);
}
catch ($e){
warn "Failed to read JSON file $file\n";
next;
}
#
# Decode and process the JSON, returning FALSE if it fails so we get
# a log entry (unless it's dry run mode)
#
unless (
convert_JSON(
$json, $json_text, $updates, $file,
$log, $dry_run, $verbose
)
)
{
$log->warning("JSON decode failure; aborting '$shortfile'")
unless $dry_run;
}
}
}
#=== FUNCTION ================================================================
# NAME: do_mail_process
# PURPOSE: Process any pending email found in a directory search
# PARAMETERS: $files arrayref containing the mail files found
# $updates hashref to contain the details collected from
# the mail files
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS: Nothing
# DESCRIPTION: The files containing email have been found earlier and passed
# to this function. They are processed in sorted order. Each
# file is parsed as a mail file and the various destination
# addresses collected. (TODO Check these against permitted
# addresses). Each message is expected to be in MIME format with
# an attachment, which is extracted. The target mimetype
# (application/json) is checked for (TODO This is a global and
# shouldn't be). The JSON it contains is decoded into a hash and
# is validated, and the elements cleaned up. The JSON is stored
# for later in the $updates structure. If all tests and checks
# are passed the entry for this file in the $updates structure
# is marked as valid. If anything fails the entry is marked as
# invalid for the next stages of processing.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub do_mail_process {
my ( $files, $updates, $log, $dry_run, $verbose ) = @_;
my ( $shortfile, $dt );
#
# Prepare for parsing mail and JSON
#
my $parser = MIME::Parser->new;
$parser->output_under("/tmp");
my $json = JSON->new;
#
# Examine the files we found
#
foreach my $file ( sort(@$files) ) {
print ">> $file\n" if $verbose > 0;
$log->notice("Processing $file") unless $dry_run;
#
# Initialise for this file, marking it as invalid by default.
#
$updates->{$file} = {};
$updates->{$file}->{valid} = 0;
$updates->{$file}->{moderated} = 0;
$updates->{$file}->{errors} = [];
$updates->{$file}->{action} = '';
$updates->{$file}->{file} = $file; # For convenience
$updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file);
#
# Open the current file and load its contents into a Mail::Internet object
# TODO: Consider using IO::All
#
open( my $mfh, '<', $file )
or die "Failed to open input file '$file' : $!\n";
my $mi_obj = Mail::Internet->new($mfh);
close($mfh)
or warn "Failed to close input file '$file' : $!\n";
#
# This should be to comments@hackerpublicradio.org in one of these headers
#
my @addresses = (
Mail::Address->parse( $mi_obj->head->get('Resent-to') ),
Mail::Address->parse( $mi_obj->head->get('Resent-cc') )
);
if ( !@addresses ) {
@addresses = (
Mail::Address->parse( $mi_obj->head->get('To') ),
Mail::Address->parse( $mi_obj->head->get('Cc') )
);
}
#
# Report the headers we're interested in if requested
#
if ($verbose > 1) {
foreach my $addr (@addresses) {
my $dest = lc( $addr->address );
print "To: $dest\n";
}
print '~ ' x 40,"\n";
}
#
# TODO Check the message is actually for us
#
my ( $ct, $cte, @body, $body, $results );
#
# Parse the message in the assumption it's MIME. Report any errors and
# warnings.
#
my $entity = $parser->parse_data( $mi_obj->as_string() );
unless ($entity) {
$results = $parser->results;
foreach my $pmsg ( $results->msgs ) {
print STDERR "$pmsg\n";
}
}
#
# If we got multiple MIME parts then look for the first 'application/json'
# element and save it as the body we'll work on.
#
if ( scalar( $entity->parts ) > 0 ) {
#
# MIME message
#
if ( $DEBUG > 1 ) {
_debug(1, 'Message is MIME with multiple parts');
foreach my $ent ( $entity->parts ) {
_debug(1, "Type: " . $ent->mime_type);
}
}
#
# Get the first MIME part of the type we want and turn it into an
# array.
# TODO: Might it be better to make a string here since later we simply
# convert the array to a string again.
#
foreach my $ent ( $entity->parts ) {
if ( $ent->mime_type eq $target_mimetype ) {
$ct = $ent->mime_type;
#
# FIXME: Using 'bodyhandle' is supposed to decode the entity
# body, but this is untested. Also, we have to use 'as_string'
# then split on "\n" and stick the newlines back on the array
# elements, which seems a bit weird. Have to monitor this to
# see how it behaves.
#
#@body = @{ $ent->body() };
@body = map {"$_\n"}
split( "\n", $ent->bodyhandle->as_string );
_debug($DEBUG > 1,
'Length of body = ' . scalar(@body) . ' lines'
);
#
# If we got a non-empty body then exit the loop
#
last if @body;
}
}
#
# We found no suitable part so there's nothing to process here. We'll
# let the later phases detect this though
#
unless (@body) {
saveError( "MIME message has no valid '$target_mimetype' element",
$updates->{$file}->{errors} , $log, $dry_run, $verbose );
next;
}
}
else {
#
# The message has no MIME parts
#
saveError( "Message has no attachment",
$updates->{$file}->{errors}, $log, $dry_run, $verbose );
next;
}
#
# Display the body unless we're not very verbose
#
$body = join( "", @body );
print "$body\n" if $verbose > 2;
#
# Decode and process the JSON, returning FALSE if it fails so we get
# a log entry (unless it's dry run mode)
#
unless (
convert_JSON(
$json, $body, $updates, $file,
$log, $dry_run, $verbose
)
)
{
$log->warning("JSON decode failure; aborting '$shortfile'")
unless $dry_run;
}
}
continue {
print '-' x 80, "\n" if $verbose > 0;
}
return;
}
#=== FUNCTION ================================================================
# NAME: do_database_check
# PURPOSE: Look to see if the collected comments are already in the
# database
# PARAMETERS: $dbh database handle
# $updates structure (hashref) holding collected
# information about the files and comments in
# them
# $log log handle
# $verify Boolean flagging the need to verify
# TODO: Is this right?
# $dry_run Boolean flagging dry-run mode
# RETURNS: Nothing
# DESCRIPTION: Looks for each comment in %$updates in the database. The
# search criteria are the comment timestamp (in database format)
# and the comment author name. The timestamp is expected to have
# been converted from full ISO format (2024-01-10T11:16:11Z) to
# the format MySQL likes (2024-01-10 11:16:11). the original
# date is in the element 'orig_timestamp'. If there is a match
# in the database then the comment is to be skipped, and we mark
# the element appropriately and display and log a message. If no
# match we can proceed without skipping;
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub do_database_check {
my ( $dbh, $updates, $log, $verify, $dry_run ) = @_;
my ( $sth1, $h1, $shortfile, $message );
#
# Look for a comment with details matching an incoming one. If found
# someone else has approved it so we need to skip it.
#
$sth1 = $dbh->prepare(
q{SELECT id FROM comments
WHERE comment_timestamp = ?
AND comment_author_name = ?}
) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
foreach my $file ( sort_by_timestamp($updates) ) {
#
# Make a shortcut to the JSON part, and collect the short form of the
# filename.
#
my $ptr = $updates->{$file}->{JSON};
#my $errors = $updates->{$file}->{errors};
$shortfile = $updates{$file}->{shortfile};
#
# Look for the comment in the database
#
$sth1->execute( $ptr->{comment_timestamp},
$ptr->{comment_author_name} );
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Did we find it?
#
if ( $h1 = $sth1->fetchrow_hashref() ) {
#
# Found in the database. Mark this comment to be skipped and save
# an error report
#
#$updates->{$file}->{valid} = 0;
$updates->{$file}->{skip} = 1;
$message = "Comment in file '$shortfile' is already in the database";
print "$message\n" if $verbose > 0;
$log->warn($message) unless $dry_run;
}
else {
#
# Not found
#
$updates->{$file}->{skip} = 0;
}
}
}
#=== FUNCTION ================================================================
# NAME: do_moderation
# PURPOSE: Moderate all the valid comments in the 'updates' structure
# PARAMETERS: $dbh database handle
# $updates structure (hashref) holding collected
# information about the files and comments in
# them
# $tt_template path to the template we're using
# $log log handle
# $verify Boolean flagging the need to verify
# $dry_run Boolean flagging dry-run mode
# RETURNS: Nothing
# DESCRIPTION: Performs the moderation pass on the accumulated comments in
# the $updates structure. This is a hash (referenced by
# $updates) containing the filename as the key. Each of these
# lead to a hashref containing the JSON data (in a hashref), an
# arrayref of error messages and various flags denoting whether
# the comment is valid, has been moderated and what action the
# moderator took.
# The flags and error messages are manipulated here depending on
# what choice is made during the moderation process.
# The contents of the JSON hash (or a subset thereof) and the
# data about the show returned from a database query are
# displayed for the moderator using a template, and a menu of
# 'Approve', 'Ban', 'Reject', Ignore' is presented.
# On exit the structure referenced by $updates has been modified
# to signal what should be done with each of the comments.
# THROWS: No exceptions
# COMMENTS: TODO: References globals and should not.
# SEE ALSO: N/A
#===============================================================================
sub do_moderation {
my ( $dbh, $updates, $tt_template, $log, $verify, $dry_run ) = @_;
my ( $sth1, $h1, $cdt, $age );
my $agelimit = 60 * 60 * 24;
#
# We'll get details of the show each comment is associated with using this
# query. It will be passed to the TT² template.
#
$sth1 = $dbh->prepare(
q{SELECT e.id, e.date, e.title, h.host FROM eps e
JOIN hosts h ON e.hostid = h.hostid
WHERE e.id = ?}
) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# We'll display each comment with a TT² template
#
my $tt = Template->new(
{ ABSOLUTE => 1,
ENCODING => 'utf8',
INCLUDE_PATH => $basedir,
}
);
my $vars = {};
my $choice;
#
# Process the valid comments, and not marked as 'skip'
#
foreach my $file ( sort_by_timestamp($updates) ) {
if ( $updates->{$file}->{valid} > 0 && $updates->{$file}->{skip} == 0 ) {
#
# Make shortcuts
#
my $ptr = $updates->{$file}->{JSON};
my $errors = $updates->{$file}->{errors};
#
# Check the age of the comment if we're verifying, and ignore if
# less than the age limit. The original timestamp should have been
# reserved even though we had to reformat for MySQL/MariaDB.
#
if ($verify) {
$cdt = DateTime::Format::ISO8601->parse_datetime(
$ptr->{orig_timestamp}
);
$age = DateTime->now->epoch - $cdt->epoch;
if ($age < $agelimit) {
print "Choice 'Ignore' auto-selected due to comment age\n";
$updates->{$file}->{action} = 'Ignore';
$log->info("Ignored comment in $file")
unless $dry_run;
$updates->{$file}->{moderated} = 0;
next;
}
}
#
# Get the show details from the database
#
$sth1->execute( $ptr->{eps_id} );
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Do we know this episode?
# NOTE: If the database we're using is the clone this might fail
# because it's out of step with the live one
#
if ( $h1 = $sth1->fetchrow_hashref() ) {
#
# Populate the variables for the template
#
$vars->{file} = $file;
$vars->{db} = $h1;
$vars->{comment} = $ptr;
#
# Call the template and print the result
#
my $document;
$tt->process( $tt_template, $vars, \$document,
{ binmode => ':utf8' } )
|| die $tt->error(), "\n";
print $document;
#
# Prompt for moderation. Note: these prompts are used to flag
# the choice in the %updates hash, so don't change them unless
# you also change them elsewhere.
#
$choice = prompt(
'Choose...',
-menu => [ 'Approve', 'Ban', 'Reject', 'Ignore' ],
'>'
);
if ($choice) {
print "Choice '$choice' selected\n";
if ( $choice eq 'Approve' ) {
$updates->{$file}->{action} = $choice;
$log->info("Approved comment in $file")
unless $dry_run;
$updates->{$file}->{moderated} = 1;
}
elsif ( $choice eq 'Ban' ) {
$updates->{$file}->{action} = $choice;
saveError(
"Comment in '$file' banned by the moderator",
$errors,
$log, $dry_run, 0
);
$updates->{$file}->{valid} = 0;
$updates->{$file}->{moderated} = 1;
}
elsif ( $choice eq 'Reject' ) {
$updates->{$file}->{action} = $choice;
saveError(
"Comment in '$file' rejected by the moderator",
$errors,
$log, $dry_run, 0
);
$updates->{$file}->{valid} = 0;
$updates->{$file}->{moderated} = 1;
}
elsif ( $choice eq 'Ignore' ) {
$updates->{$file}->{action} = $choice;
$log->info("Ignored comment in $file")
unless $dry_run;
$updates->{$file}->{moderated} = 0;
}
}
else {
print "\n";
undef $choice; # Seem to need this after aborting 'prompt'
print "Ignored this comment\n";
$log->info("Ignored comment in $file (default action)")
unless $dry_run;
$updates->{$file}->{moderated} = 0;
}
#print "\n";
}
else {
#
# We couldn't find the episode in the database
#
printf "Episode %s is not known\n", $ptr->{eps_id};
$updates->{$file}->{action} = 'Ignore';
saveError(
"Episode referenced in '$file' is not known",
$errors,
$log, $dry_run, 0
);
$updates->{$file}->{moderated} = 0;
}
}
}
}
#=== FUNCTION ================================================================
# NAME: do_update
# PURPOSE: Add valid, approved comments to the database (or pretend to do
# so if in dry-run mode)
# PARAMETERS: $dbh database handle
# $updates structure (hashref) holding collected
# information about the files and comments in
# them
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS: Nothing
# DESCRIPTION: Performs a database update for all valid approved comments in
# the structure referenced by $updates unless in dry-run mode
# where what would be done will be reported.
# THROWS: No exceptions
# COMMENTS: TODO: References globals and should not.
# SEE ALSO: N/A
#===============================================================================
sub do_update {
my ( $dbh, $updates, $log, $dry_run, $verbose ) = @_;
my $sql = SQL::Abstract->new;
my ( $stmt, @bind, $show, $count );
$count = 0;
foreach my $file ( sort_by_timestamp($updates) ) {
if ( $updates->{$file}->{valid} > 0
&& $updates->{$file}->{action} eq 'Approve' )
{
$count++;
$show = $updates->{$file}->{JSON}->{eps_id};
my $shortfile = $updates{$file}->{shortfile};
#
# Build the statement
# NOTE: The JSON keys we wish to use match the column names in the
# 'comments' table. The second argument to 'insert' needs to be
# a hashref, so we use @json_keys to list the keys we need, with
# the whole expression returning an anonymous hash (hashref)
#
( $stmt, @bind )
= $sql->insert( 'comments',
{ %{ $updates->{$file}->{JSON} }{@json_keys} } );
#
# Display the statement we constructed if requested
#
_debug($DEBUG > 1,
"$stmt\n'",
join( "','", @bind )
);
#
# Take action or report potential changes
#
unless ($dry_run) {
#
# Apply the SQL
#
$dbh->do( $stmt, undef, @bind );
if ( $dbh->err ) {
#
# Something broke during the database update. Warn about
# it, save the error and make the action 'Ignore' so the
# file is kept for further investigation (or to be
# edited). Marking the file as invalid may be overkill.
#
warn $dbh->errstr;
saveError(
$dbh->errstr,
$updates{$file}->{errors},
$log, $dry_run, $verbose
);
# $updates{$file}->{valid} = 0;
$updates->{$file}->{action} = 'Ignore';
}
else {
$log->info("$file: inserted comment for show $show");
if ( $verbose > 0 ) {
print "Updated show $show from file $shortfile\n";
}
}
}
else {
print
"Would have added comment for show $show from file $shortfile\n"
if $verbose > 0;
}
}
}
if ($verbose > 0 && $count > 0) {
print "\n", '-' x 80, "\n";
}
}
#=== FUNCTION ================================================================
# NAME: convert_JSON
# PURPOSE: Converts the JSON from a file or an email into an internal
# data structure
# PARAMETERS: $json JSON object
# $jtext JSON in text format
# $updates hashref to contain the details collected from
# the JSON and elsewhere
# $file the file we're processing (hash key)
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS:
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub convert_JSON {
my ( $json, $jtext, $updates, $file, $log, $dry_run, $verbose ) = @_;
my ($comment, $dt, $shortfile);
$shortfile = $updates{$file}->{shortfile};
#
# Decode the JSON into a hashref
#
try {
$comment = $json->decode($jtext);
}
#
# If the JSON decode failed we have a problem
#
catch ($e){
saveError(
"JSON in file '$shortfile' failed to decode",
$updates->{$file}->{errors},
$log, $dry_run, $verbose
);
return 0;
}
#
# Validate the comment fields: do we have all we expected?
#
unless (validate_comment($comment)) {
saveError(
"JSON in file '$shortfile' failed to validate",
$updates->{$file}->{errors},
$log, $dry_run, $verbose
);
return 0;
}
#
# Perform some cleaning
#
foreach my $key (@cleanable_keys) {
$comment->{$key} = clean_comment_field( $comment->{$key} );
}
#
# ISO 8601 timestamps cause MySQL problems (or have started to) so we
# parse them and convert to an acceptable format. We keep the original
# format in $verify is set and we have to check its age.
#
try {
$dt = DateTime::Format::ISO8601->parse_datetime(
$comment->{comment_timestamp} );
}
catch ($e){
die "Failed to parse comment timestamp\n";
}
$comment->{orig_timestamp} = $comment->{comment_timestamp};
$comment->{comment_timestamp} = $dt->ymd . ' ' . $dt->hms;
#
# Stash the JSON for later
#
$updates->{$file}->{JSON} = $comment;
#
# This file is valid we think
#
$updates->{$file}->{valid} = 1;
$log->info("$file seems valid") unless $dry_run;
return 1;
}
#=== FUNCTION ================================================================
# NAME: report_action
# PURPOSE: Does all of the necessary reporting as the email or JSON files
# are being acted upon
# PARAMETERS: $struct hashref holding collected comment data for
# a given mail file
# $destname name of destination
# $dest destination directory
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS: Nothing
# DESCRIPTION: In dry run mode reports what would have been done along with
# any errors, but only in verbose mode. Otherwise reports the
# action (in verbose mode) and logs it too. It also moves the
# mail file to the requested directory.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub report_action {
my ( $struct, $destname, $dest, $log, $dry_run, $verbose ) = @_;
#
# Get values with a hashref slice
#
my ( $file, $shortfile ) = @$struct{ 'file', 'shortfile' };
unless ($dry_run) {
if ($verbose > 0) {
print "Moving '$shortfile' to '$destname'\n";
print_errors($struct->{$file}->{errors});
}
$log->info("Moving '$file' to '$destname'");
unless ( moveFile( $file, $dest ) ) {
warn "Unable to move $shortfile\n";
$log->error("Failed to move $shortfile");
}
}
else {
if ($verbose > 0) {
print "Would move '$shortfile' to '$destname' (dry run)\n";
print_errors($struct->{$file}->{errors});
}
}
return;
}
#=== FUNCTION ================================================================
# NAME: call_back
# PURPOSE: Invokes a PHP interface on the HPR server through which
# a comment can be marked as fully processed
# PARAMETERS: $format sprintf template for building the URL
# $key the key from the JSON
# $action the action to be taken
# $user the username from the configuration
# $pass the password from the configuration
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS: True (1) if the call performed as it should, otherwise false
# (0)
# DESCRIPTION: Checks that the correct parameters have been supplied, then
# builds the URL we're going to use from the format string, key
# and action. If in dry-run mode this URL is reported and
# nothing more. Otherwise the URL is queried with an HTTP 'GET'.
# If the response shows success then the action is considered to
# have been taken. The returned message is displayed (in verbose
# mode) and success is logged. If the 'GET' failed then
# a warning message is generated along with a warning log entry.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub call_back {
my ( $format, $key, $action, $user, $pass, $log, $dry_run, $verbose ) = @_;
my ( $ua, $url, $response, $code, $message, $content );
#
# Check calling parameters
#
unless ($key) {
warn "Invalid call to 'call_back' subroutine; missing key\n";
return 0;
}
unless ( $action =~ /^(approve|block|delete)$/ ) {
warn "Invalid call to 'call_back' subroutine; invalid action\n";
return 0;
}
#
# Build the URL
#
$url = sprintf( $format, $key, $action );
#
# Perform the call to clean up the JSON files, etc, or just say what would
# have been done
#
unless ($dry_run) {
print "Making the server call:\nGET $url\n" if $verbose > 0;
$ua = LWP::UserAgent->new;
$ua->timeout(180); # Module default is 180 sec = 3 mins
$ua->credentials("hub.hackerpublicradio.org:443", "cms", $user, $pass);
$response = $ua->get($url);
if ( $response->is_success ) {
#
# Server call succeeded
#
$code = $response->code;
$message = $response->message;
$content = $response->decoded_content;
if ($verbose > 0) {
print "Success: $code/$message\n";
print "Content: $content\n" if $content;
}
$log->info("Successful server message $key/$action: " .
$response->status_line);
return 1;
}
else {
#
# Server call failed
#
warn $response->status_line . "\n";
$log->error("Failed server message $key/$action: " .
$response->status_line);
# Try to log as much as possible
$log->error( $response->decoded_content )
if $response->decoded_content;
return 0;
}
}
else {
#
# Always a success in dry-run mode
#
print "Would have made the server call:\nGET $url\n" if $verbose > 0;
return 1;
}
}
#=== FUNCTION ================================================================
# NAME: print_errors
# PURPOSE: Prints the contents of the error array if there's anything in
# it (i.e. not empty)
# PARAMETERS: $errors arrayref possibly containing errors
# RETURNS: Nothing
# DESCRIPTION: Simplifies printing in the case when there's nothing to print
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub print_errors {
my ($errors) = @_;
return unless defined($errors);
print "Errors: ", join( "\n", @{$errors} ), "\n"
if @{$errors};
}
#=== FUNCTION ================================================================
# NAME: saveError
# PURPOSE: Saves an error message in an error structure and logs it if in
# live mode
# PARAMETERS: $message error message text
# $struct structure (arrayref) to hold the text
# $log log handle
# $dry_run Boolean flagging dry-run mode
# $verbose Integer level of verbosity
# RETURNS: Nothing
# DESCRIPTION: Simplifies the saving of an error message and logging it
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub saveError {
my ( $message, $struct, $log, $dry_run, $verbose ) = @_;
$log->warn($message) unless $dry_run;
push( @{$struct}, $message );
print "$message\n" if $verbose > 0;
}
#=== FUNCTION ================================================================
# NAME: validate_comment
# PURPOSE: Checks that all of the required fields are present in the
# JSON comment
# PARAMETERS: $comment - a pointer to the JSON hash
# RETURNS: True (1) if valid, otherwise false (0)
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub validate_comment {
my ($comment) = @_;
foreach my $key (@json_keys) {
return 0 unless defined($comment->{$key});
}
return 1;
}
#=== FUNCTION ================================================================
# NAME: count_tags
# PURPOSE: Counts the number of start tags in what might be HTML
# PARAMETERS: $string String to examine
# RETURNS: An integer count of the number of start tags found
# DESCRIPTION: Uses HTML::Parser to parse the input string. A handler is
# defined which accumulates start and end tags in an array. The
# number of start tags found is returned to the caller.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub count_tags {
my ($string) = @_;
my @accum;
chomp($string);
return 0 unless $string;
my $p = HTML::Parser->new(
api_version => 3,
handlers => {
start => [ \@accum, "event,text" ],
end => [ \@accum, "event,text" ]
}
);
$p->parse($string);
$p->eof;
return scalar(@accum);
}
#=== FUNCTION ================================================================
# NAME: clean_comment_field
# PURPOSE: Cleans a string field from the comment
# PARAMETERS: $field - the field to be cleaned
# RETURNS: The cleaned string
# DESCRIPTION: We want to remove all HTML. We also want to add HTML entities
# where these may be needed. If there are already entities in
# the data we will decode them first then re-encode so we don't
# get double encoding.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub clean_comment_field {
my ($field) = @_;
#
# Remove all HTML
#
my $hr = HTML::Restrict->new();
$field = $hr->process($field);
#
# Entities. Make sure all existing entities are decoded first, then encode
# the result. Note that the unsafe characters are the reverse of what's
# listed. The second argument to encode_entities uses regular expression
# character class syntax (as if in [] in a RE). The set is all non-plain
# ASCII exclusing newline, carriage return and '<>&'.
#
decode_entities($field);
$field
= encode_entities( $field, '^\n\r\x20-\x25\x27-\x3b\x3d\x3f-\x7e' );
return $field;
}
#=== FUNCTION ================================================================
# NAME: moveFile
# PURPOSE: Moves a file (renames it) taking account of whether the name
# exists at the destination
# PARAMETERS: $file file to move
# $dest destination of the file
# RETURNS: True (1) if the move succeeded, otherwise false (0)
# DESCRIPTION: A wrapper around the File::Copy 'move' function. If the target
# file exists then a version number is appended to the name so
# that the original is not clobbered.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub moveFile {
my ( $file, $dest ) = @_;
my ( $target, $basename, $prefix, $vsn );
return 0 unless -e $file;
#
# Handle a destination directory
#
if ( -d $dest ) {
( $basename = $file ) =~ s|.*/||mx;
$target = "$dest/$basename";
}
else {
$target = $dest;
}
#
# If the target exists we'll have to do some work, otherwise just do the
# move
#
if ( -e $target ) {
if ( ( $prefix, $vsn ) = ( $target =~ /(\.)(\d+)$/ ) ) {
while ( -e "$prefix$vsn" ) {
$vsn++;
$target = "$prefix$vsn";
}
return move( $file, $target );
}
else {
$vsn = 1;
$prefix = $target;
$target = "$prefix.$vsn";
while ( -e "$prefix.$vsn" ) {
$vsn++;
$target = "$prefix.$vsn";
}
return move( $file, $target );
}
}
else {
return move( $file, $dest );
}
}
#=== FUNCTION ================================================================
# NAME: abbreviate_filename
# PURPOSE: Abbreviates a long pathname by shrinking the directory names.
# PARAMETERS: $filename The name of the file
# RETURNS: A string containing the abbreviated name
# DESCRIPTION: Splits the filename, assembles the directories as just their
# first character, and then appends the filename. A bit
# primitive but it works.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub abbreviate_filename {
my ($filename) = @_;
my @elements = split('/',$filename);
my @shortened = map {substr($_,0,1)} @elements[0..$#elements-1];
return join('/',@shortened,$elements[-1]);
}
#=== FUNCTION ================================================================
# NAME: sort_by_timestamp
# PURPOSE: Sorts the main hash by timestamp rather than the normal sort
# by key
# PARAMETERS: $struct hashref pointing to the main structure
# RETURNS: A list of hash keys in order of timestamp
# DESCRIPTION: Generates a list of keys by sorting based on the comment
# timestamp in the JSON hash. Performs the sort by doing
# a string comparison since the dates are in ISO8601
# 'YYYY-MM-DDTHH:MM:SSZ' format (e.g. 2017-10-06T06:22:55Z).
# This seems to be safe.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub sort_by_timestamp {
my ($struct) = @_;
my @result = sort {
$struct->{$a}->{JSON}->{comment_timestamp}
cmp $struct->{$b}->{JSON}->{comment_timestamp}
} keys(%$struct);
return @result;
}
#=== FUNCTION ================================================================
# NAME: _debug
# PURPOSE: Prints debug reports
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
# @messages List of messages to print
# RETURNS: Nothing
# DESCRIPTION: Outputs messages to STDERR if $active is true. For each
# message it removes any trailing newline and then adds one in
# the 'print' so the caller doesn't have to bother. Prepends
# each message with 'D> ' to show it's a debug message.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _debug {
my ( $active, @messages ) = @_;
return unless $active;
foreach my $msg (@messages) {
chomp($msg);
print STDERR "D> $msg\n";
}
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION:
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = (
"help", "documentation|manpage",
"debug=i", "dry-run!",
"verbose+", "verify!",
"live!", "json!",
"config=s",
);
if ( !GetOptions( $optref, @options ) ) {
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
process_comments
=over 4
Process incoming comment files as email messages or JSON files
=back
=head1 VERSION
This documentation refers to process_comments version 0.3.3
=head1 USAGE
./process_comments [-help] [-doc] [-debug=N] [-[no]dry-run]
[-verbose ...] [-[no]live] [-[no]json] [-config=FILE]
./process_comments -dry-run
./process_comments -debug=3 -dry-run
./process_comments -verbose -verbose
./process_comments -help
./process_comments -documentation
./process_comments -json
./process_comments -verb -live -config=.process_comments_live.cfg
=head1 OPTIONS
=over 8
=item B<-help>
Prints a brief help message describing the usage of the program, and then exits.
=item B<-documentation> or B<-manpage>
Prints the entire embedded documentation for the program, then exits.
=item B<-debug=N>
Enables debugging mode when N > 0 (zero is the default). The levels are:
=over 4
=item B<1>
Reports on the options chosen. Running with the B<-dry-run> option prevents
problems with comments if any options have been incorrectly used.
=item B<2>
Prints all of the information described at the previous level.
Prints the names of files found in the mail or JSON spool areas.
Prints the internal details of the email, listing the MIME parts (if there are any).
Prints the length of the MIME part matching the desired type, in lines.
Prints the entirety of the internal structure holding details of the mail file
and the comment it contains. This follows the moderation pass.
Prints the SQL that has been constructed to update the database.
=item B<3>
Prints all of the information described at the previous levels.
Prints the contents of the B<%config> hash built from the configuration file.
=back
=item B<-[no]dry-run>
Controls the program's B<dry-run> mode. It is off by default. In dry-run mode
the program reports what it would do but makes no changes. When off the
program makes all the changes it is designed to perform.
=item B<-verbose>
This option may be repeated. For each repetition the level of verbosity is
increased. By default no verbosity is in effect and the program prints out the
minimal amount of information.
Verbosity levels:
=over 4
=item B<1>
Prints the name of each mail (or JSON) file as it's processed.
Prints any error messages during message validation, which are also being
logged (unless in dry-run mode) and saved for reporting later.
Prints a notification if the comment is added to the database (or that this
would have happened in dry-run mode).
Prints messages about the moving of each mail (or JSON) file from the
processing area, along with any errors accumulated for that file. In dry-run
mode simply indicates what would have happened.
Prints the response code received from the server when invoking the interface
for updating comment files there. If in dry-run mode the message produced
merely indicates what would have happened.
If validation failed earlier on then further information is produced about the
final actions taken on these files.
=item B<2>
Prints the addresses each mail message is being sent to (unless in JSON mode).
=item B<3>
Prints the JSON contents of each mail message (or of each JSON file).
=back
=item B<-[no]verify>
This option controls whether the script imposes a delay on comments. The idea
is that if the comment feature is used to rant on a subject or to pass
misinformation, delaying them will help to defuse the situation.
The default state is B<-noverify>; a delay is not imposed. Selecting
B<-verify> means that comments have to be at least 24 hours old before they
are processed. The length of the delay cannot currently be changed without
altering the script.
This feature of the script has hardly been used and is perhaps against the
spirit of HPR, so it will probably be removed in a future version.
=item B<-[no]live>
This option determines whether the program runs in live mode or not. The
default varies depending on which system it is being run on.
IT SHOULD NOT USUALLY BE NECESSARY TO USE THIS!
In live mode the program makes changes to the live database and sends messages
to the live web interface when a comment has been processed. With live mode
off the program assumes it is writing to a clone of the database and it does
not inform the webserver that a comment has been processed.
The default for the copy of the program on borg is that live mode is ON.
Otherwise the default is that live mode is OFF. The setting is determined by
the sed script called B<fixup.sed> on borg. This needs to be run whenever
a new version of the program is released. This is done as follows:
sed -i -f fixup.sed process_comments
=item B<-[no]json>
This option selects JSON mode, which makes the script behave in a different
way from the default mode (B<-nojson> or MAIL mode) where it processes email
containing comments.
In JSON mode the script looks in a sub-directory called B<json/> where it
expects to find JSON files. The normal way in which these files arrive in this
directory is by using B<scp> to copy them from the HPR server (the directory
is B</home/hpr/comments>). This is a provision in case the normal route of
sending out email messages has failed for some reason. It also saves the user
from setting up the mail handling infrastructure that would otherwise be
needed.
In JSON mode the mail handling logic is not invoked, files are searched for in
the B<json/> directory and each file is processed, moderation is requested and
the comment is added to the database. In `B<-live>` mode the server is informed
that the comment has been processed.
The B<json/> directory needs to have three sub-directories: B<processed>,
B<banned> and B<rejected>. The script will place the processed files into
these sub-directories according to the moderation choice made. This makes it
easier to see what actions were taken and helps avoid repeated processing of
the same comment.
=item B<-config=FILE>
This option defines a configuration file other than the default
B<.hpr_db.cfg>. The file must be formatted as described below in the section
B<CONFIGURATION AND ENVIRONMENT>.
=back
=head1 DESCRIPTION
A script to process new comments, moderate them and add them to the HPR
database.
In the new HPR comment system (released September 2017) a new web form is
presented in association with each show. The form can be used to submit
a comment on the show in question and takes some standard fields: the name of
the commenter, the title of the comment and the body of the comment itself.
Once the comment has been submitted its contents are formatted as a JSON
object and are sent as a mail attachment to the address
B<comments@hackerpublicradio.org>.
Recipients of these mail messages can then perform actions on the comments
they contain to cause them to be added to the HPR database. These actions are:
approve the comment, block it (because it is inappropriate or some form of
Spam and we want to prevent any further messages from the associated IP
address), or reject it (delete it). There is also an ignore option which skips
the current comment in this run of the script so it can be revisited later.
This script can process an entire email message which has been saved to a file
or a file containing the JSON object (as in the email attachment). When
processing email it is expected that it will be found in a maildrop directory,
and when finished the messages will be placed in sub-directories according to
what actions were carried out. A similar logic is used for JSON files; they
are expected to be in a drop area and are moved to sub-directories after
processing.
The directories used as spool areas are defined in the configuration file(s)
described below.
=head2 CHANGES
=head3 2024
An alternative method of comment approval has been added using PHP on the
server B<https://hub.hackerpublicradio.org>. This script was originally
designed around receiving email messages containing JSON or collecting JSON
files. The assumption was that it was the sole agent processing these messages
or files and would therefore be the only way that comments could enter the
database.
With the new server route it became possible for an emailed comment to be
received when that comment was already in the database.
As of version B<0.3.2> the script has been enhanced to check whether comments
received as mail messages (or JSON files) have already been processed. If this
is found to be the case the incoming data is skipped and stored in the
'B<processed>' directory (because the comment has been approved elsewhere).
There is still scope for confusion - if for example, a comment has been
rejected on the server, the script will see it as a potential addition to the
database. The moderators need to coordinate their decisions!
The ultimate solution will be to have just one way of moderating and
installing comments which is available to any of the "Janitors".
=head2 MAIL HANDLING
One way of handling incoming mail is to use a mail client which is capable of
saving messages sent to the above address in the spool area mentioned earlier.
For example, Thunderbird can do this by use of a filter and a plugin. Other
MUA's will have similar capabilities.
When this script is run on the mail spool area it will process all of the
files it finds. For each file it will check its validity in various ways,
display the comment then offer a moderation menu. The moderation options are
described below.
=head3 APPROVE
If a comment is approved then it will be added to the database, the associated
mail file will be moved to a sub-directory (by default called 'B<processed>'),
and the HPR server will be notified of this action.
=head3 BAN
If a comment is banned then it will not be added to the database. The mail
file will be moved to the sub-directory 'B<banned>' and the HPR server will be
informed that the IP address associated with the comment should be placed on
a black list.
=head3 REJECT
If a comment is rejected it is not written to the database, the mail file is
moved to the sub-directory 'B<rejected>' and the HPR server informed that the
comment can be deleted.
=head3 IGNORE
If a comment is ignored it is simply left in the mail spool and no further
processing done on it. It will be eligible for processing again when the
script is next run.
=head2 JSON FILE HANDLING
As described under the description of the B<-[no]json> option, the script
allows the processing of a multiple JSON files, each containing a single
comment. These files are expected to be in another spool area (defined int he
configuration file). The JSON is checked and all of the comment fields are
validated, then the moderation process is begun.
Moderation in this case consists of the same steps as described above except
that no mail file actions are taken and the JSON file is moved to
a sub-directory after processing.
=head1 DIAGNOSTICS
=over 4
=item B<Unable to find configuration file ...>
Type: fatal
The nominated configuration file referenced in B<-config=FILE> was not found.
=item B<No mail found; nothing to do>
Type: fatal
No mail files were found in the mail spool area requiring processing.
=item B<No JSON files found; nothing to do>
Type: fatal
No JSON files were found in the JSON spool area requiring processing.
=item B<Failed to read JSON file '...' ...>
Type: fatal
A JSON file in the spool area could not be read with a JSON parser.
=item B<Failed to parse comment timestamp ...>
Type: fatal
The timestamp must be converted to a format compatible with MySQL/MariaDB but
during this process the parse failed.
=item B<Failed to open input file '...' ...>
Type: fatal
A mail file in the spool area could not be opened.
=item B<Failed to move ...>
Type: warning
A mail file could not be moved to the relevant sub-directory.
=item B<Failed to close input file '...' ...>
Type: warning
A mail file in the spool area could not be closed.
=item B<Various error messages from the database subsystem>
Type: fatal, warning
An action on the database has been flagged as an error.
=item B<Various error messages from the Template toolkit>
Type: fatal
An action relating to the template used for the display of the comment has
been flagged as an error.
=item B<Invalid call to 'call_back' subroutine; missing key>
Type: warning
The routine 'call_back' was called incorrectly. The key was missing.
=item B<Invalid call to 'call_back' subroutine; invalid action>
Type: warning
The routine 'call_back' was called incorrectly. The action was invalid.
=item B<Error from remote server indicating failure>
Type: warning
While attempting to send an action to the remote server with the 'call_back'
subroutine an error message was received.
=back
=head1 CONFIGURATION AND ENVIRONMENT
=head2 CONFIGURATION
The configuration file design was changed for version B<0.2.7> and beyond.
The script expects a configuration file to provide it with various program
settings, database credentials and HPR CMS credentials.
The default configuration file name is B<.process_comments.cfg> and is for use
with the local clone of the hpr database.
This file has the general structure:
<<include .process_comments_settings.cfg>>
<<include .hpr_db.cfg>>
<cms>
user = "dummy:dummy"
</cms>
It uses B<Config::General> 'B<include>' directives to include the file
B<.process_comments_settings.cfg> which holds the program settings. It then
includes the file B<.hpr_db.cfg> which containss credentials for the cloned
database. Finally it defines dummy credentials for the HPR CMS since this
feature is not used by default.
The more useful configuration is in B<.process_comments_live.cfg>:
<<include .process_comments_settings.cfg>>
<<include .hpr_livedb.cfg>>
<cms>
<<include .hpradmin_curlrc>>
</cms>
The basic format of the main configuration data after the inclusions have been
performed is then:
<settings>
# Holds program settings
</settings>
<database>
# Holds database settings
</database>
<cms>
# Holds CMS credentials
</cms>
=head3 CONTENTS OF B<.process_comments_settings.cfg>:
This section is enclosed in '<settings>' tags:
<settings>
PROG = process_comments
#
# Defaults
#
basedir = "$HOME/HPR/Comment_system"
configfile = "$basedir/.hpr_db.cfg"
logfile = "$basedir/logs/${PROG}.log"
template = "$basedir/${PROG}.tpl"
#
# Mail message stash area
#
maildrop = "$HOME/HPR/CommentDrop"
processed = "$maildrop/processed"
rejected = "$maildrop/rejected"
banned = "$maildrop/banned"
#
# JSON stash area
#
jsondir = "$basedir/json"
jprocessed = "$jsondir/processed"
jrejected = "$jsondir/rejected"
jbanned = "$jsondir/banned"
#
# How to tell the server the comment's processed
#
callback_template = \
"https://hackerpublicradio.org/comment_process.php?key=%s&action=%s"
</settings>
=head3 CONTENTS OF B<.hpr_db.cfg> AND B<.hpr_livedb.cfg>
The default file providing this part is just for use with the default
database. The section is enclosed in '<database>' tags:
<database>
host = 127.0.0.1
port = PORT
name = DATABASE
user = USERNAME
password = PASSWORD
</database>
These settings can alternatively be used to connect to an SSH tunnel which has
been connected from a remote system (like borg) to the live database.
Assuming the port chosen for this is 3307 something like the following could
be used:
<database>
host = 127.0.0.1
port = 3307
name = hpr_hpr
user = hpr_hpr
password = "**censored**"
</database>
A typical Bash script for opening a tunnel might be:
#!/bin/bash
SSHPORT=22
LOCALPORT=3307
REMOTEPORT=3306
ssh -p ${SSHPORT} -f -N -L localhost:${LOCALPORT}:localhost:${REMOTEPORT} hpr@hackerpublicradio.org
=head3 CONTENTS OF B<.hpradmin_curlrc>
This is a file used by the 'curl' command to contain credentials to access the
CMS on the HPR server. The format needs to be:
user = hpradmin:**censored**
This is one of the formats allowed by 'curl' and is also compatible with the
B<Config::General> module used by this script.
=head2 TEMPLATE
The program displays the comment that is currently being processed for
moderation. It uses a template along with the Perl B<Template> module to do
this. By default this template is called B<process_comments.tpl>. This can be
changed by changing the line in the "settings" configuration file which sets
the 'template' variable.
The template is provided with the following data:
file a scalar containing the name of the file being processed
db a hash containing the details of the show to which the
comment relates, returned from a database query:
id the episode number
date the date of the episode
title the episode title
host the host name
comment a hash containing the fields from the comment:
eps_id the episode number
comment_timestamp date and time of the comment
comment_author_name comment author
comment_title comment title
comment_text comment text
justification justification for posting (if
relevant)
key unique comment key
=head1 DEPENDENCIES
Carp
Config::General
DBI
Data::Dumper
DateTime::Format::ISO8601
Encode
File::Copy
File::Find::Rule
File::Slurper
Getopt::Long
HTML::Entities
HTML::Restrict
IO::Prompter
JSON
LWP::UserAgent
List::Util
Log::Handler
MIME::Parser
Mail::Address
Mail::Field
Mail::Internet
Pod::Usage
SQL::Abstract
Template
The Perl B<experimental:try> option is used instead of the original
B<Try::Tiny> package.
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
Patches are welcome.
=head1 AUTHOR
Dave Morriss (Dave.Morriss@gmail.com)
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2017 - 2024 Dave Morriss (Dave.Morriss@gmail.com). All rights
reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker