forked from HPR/hpr-tools
2344 lines
77 KiB
Perl
Executable File
2344 lines
77 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
#===============================================================================
|
|
#
|
|
# FILE: process_comments
|
|
#
|
|
# USAGE: ./process_comments [-help] [-docmentation|manpage] [-debug=N]
|
|
# [-[no]dry-run] [-verbose ...] [-[no]verify] [-[no]live]
|
|
# [-[no]json] [-config=FILE]
|
|
#
|
|
# DESCRIPTION: Read new comments received as mail messages (or as JSON files)
|
|
# and perform moderation actions on them before possibly adding
|
|
# them to the HPR database.
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: 2023-12-23 enabled the experimental::try feature and removed
|
|
# the use of 'Try::Tiny'. Revert by removing "use feature 'try'"
|
|
# and 'no warnings "experimental::try"'. Remove the '($e)' from
|
|
# each 'catch' and end the 'catch' braces with a semicolon. Add
|
|
# back 'use Try::Tiny'.
|
|
#
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.3.3
|
|
# CREATED: 2017-09-04 18:06:49
|
|
# REVISION: 2024-01-10 13:02:51
|
|
#
|
|
#===============================================================================
|
|
|
|
use v5.36; # say and try/catch
|
|
use feature 'try';
|
|
use strict;
|
|
use warnings;
|
|
no warnings "experimental::try";
|
|
use utf8;
|
|
|
|
use Carp;
|
|
use Getopt::Long;
|
|
use Pod::Usage;
|
|
|
|
use Config::General;
|
|
use List::Util qw{max};
|
|
use HTML::Parser;
|
|
use HTML::Restrict;
|
|
use HTML::Entities;
|
|
use File::Find::Rule;
|
|
use File::Copy;
|
|
use File::Slurper 'read_text';
|
|
#use IO::All;
|
|
use JSON;
|
|
|
|
use Mail::Address;
|
|
use Mail::Field;
|
|
use Mail::Internet;
|
|
use MIME::Parser;
|
|
use Encode qw(decode encode);
|
|
|
|
use DateTime::Format::ISO8601;
|
|
use IO::Prompter;
|
|
use Template;
|
|
use SQL::Abstract;
|
|
use LWP::UserAgent;
|
|
|
|
#use TryCatch; # Failing 2020-05-31 due to a problem in Devel::Declare
|
|
#use Try::Tiny; # Using in-built experimental try/catch
|
|
use Log::Handler;
|
|
|
|
use DBI;
|
|
|
|
use Data::Dumper;
|
|
|
|
#
|
|
# Version number (manually incremented)
|
|
#
|
|
our $VERSION = '0.3.3';
|
|
|
|
#
|
|
# Script and directory names
|
|
#
|
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
|
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
|
$DIR = '.' unless $DIR;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Declarations
|
|
#-------------------------------------------------------------------------------
|
|
#
|
|
# Constants and other declarations
|
|
#
|
|
my $basedir = "$ENV{HOME}/HPR/Comment_system";
|
|
my $configfile = "$basedir/.${PROG}.cfg";
|
|
|
|
my ( $dbh, $sth1, $h1 );
|
|
|
|
my ( @ignore, @files );
|
|
my ($comment, %updates, $shortfile);
|
|
|
|
#
|
|
# The MIME part we're looking for
|
|
#
|
|
my $target_mimetype = 'application/json';
|
|
|
|
#
|
|
# All of the keys in the JSON we're interested in (when checking and adding to
|
|
# the database)
|
|
#
|
|
my @json_keys = (
|
|
"eps_id", "comment_timestamp",
|
|
"comment_author_name", "comment_title",
|
|
"comment_text",
|
|
);
|
|
|
|
#
|
|
# Keys we need to clean
|
|
#
|
|
my @cleanable_keys = (
|
|
"comment_author_name", "comment_title",
|
|
"comment_text",
|
|
);
|
|
|
|
#
|
|
# Enable Unicode mode
|
|
#
|
|
binmode STDOUT, ":encoding(UTF-8)";
|
|
binmode STDERR, ":encoding(UTF-8)";
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Options and arguments
|
|
#-------------------------------------------------------------------------------
|
|
my $DEF_DEBUG = 0;
|
|
my $DEF_LIVE = 0; # Gets edited on borg
|
|
|
|
#
|
|
# Process options
|
|
#
|
|
my %options;
|
|
Options( \%options );
|
|
|
|
#
|
|
# Default help
|
|
#
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 )
|
|
if ( $options{'help'} );
|
|
|
|
#
|
|
# Full documentation if requested with -doc
|
|
#
|
|
pod2usage(
|
|
-msg => "$PROG version $VERSION\n",
|
|
-verbose => 2,
|
|
-exitval => 1,
|
|
-noperldoc => 0,
|
|
) if ( $options{'documentation'} );
|
|
|
|
#
|
|
# Collect options
|
|
#
|
|
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
|
|
my $cfgfile
|
|
= ( defined( $options{config} ) ? $options{config} : $configfile );
|
|
my $live = ( defined( $options{live} ) ? $options{live} : $DEF_LIVE );
|
|
my $verify = ( defined( $options{verify} ) ? $options{verify} : 0 );
|
|
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
|
|
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
|
|
my $jsonmode = ( defined( $options{json} ) ? $options{json} : 0 );
|
|
|
|
#
|
|
# Report on the options if requested
|
|
#
|
|
if ($DEBUG > 0) {
|
|
_debug(1, 'Options');
|
|
_debug(1,
|
|
'-config=' . $cfgfile,
|
|
($live eq 0 ? '-nolive ' : '-live'),
|
|
($verify eq 0 ? '-noverify ' : '-verify'),
|
|
($dry_run eq 0 ? '-nodry-run ' : '-dry-run'),
|
|
'-verbose=[' . $verbose . ']',
|
|
($jsonmode eq 0 ? '-nojson ' : '-json'),
|
|
);
|
|
_debug(1, '');
|
|
}
|
|
|
|
#
|
|
# Sanity checks
|
|
#
|
|
die "Unable to find configuration file: $cfgfile\n" unless ( -e $cfgfile );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Configuration file - load data
|
|
#-------------------------------------------------------------------------------
|
|
my $conf = Config::General->new(
|
|
-ConfigFile => $cfgfile,
|
|
-InterPolateVars => 1,
|
|
-InterPolateEnv => 1,
|
|
-ExtendedAccess => 1,
|
|
);
|
|
my %config = $conf->getall();
|
|
|
|
_debug($DEBUG > 2, 'Configuration settings:', Dumper(\%config));
|
|
|
|
my $logfile = $config{settings}->{logfile};
|
|
my $template = $config{settings}->{template};
|
|
|
|
my $maildrop = $config{settings}->{maildrop};
|
|
my $processed = $config{settings}->{processed};
|
|
my $rejected = $config{settings}->{rejected};
|
|
my $banned = $config{settings}->{banned};
|
|
|
|
my $jsondir = $config{settings}->{jsondir};
|
|
my $jprocessed = $config{settings}->{jprocessed};
|
|
my $jrejected = $config{settings}->{jrejected};
|
|
my $jbanned = $config{settings}->{jbanned};
|
|
|
|
my $callback_template = $config{settings}->{callback_template};
|
|
|
|
my ($cb_user, $cb_password) = split(":",$config{cms}->{user});
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Moderation responses and the arguments associated with them (from the
|
|
# configuration data).
|
|
#-------------------------------------------------------------------------------
|
|
#
|
|
# key => [ 3 element array ]
|
|
#
|
|
# - key is the menu element from do_moderation
|
|
# - the array contains:
|
|
# - text to be used in messages
|
|
# - anonymous subroutine which returns the directory into which to place
|
|
# the file; call it with the $jsonmode variable
|
|
# - action keyword to send back to the server
|
|
#
|
|
my %moderation_keys = (
|
|
'Approve' => [
|
|
'processed',
|
|
sub { ( $_[0] ? $jprocessed : $processed ) },
|
|
'approve'
|
|
],
|
|
'Ban' => [
|
|
'banned',
|
|
sub { ( $_[0] ? $jbanned : $banned ) },
|
|
'block'
|
|
],
|
|
'Reject' => [
|
|
'rejected',
|
|
sub { ( $_[0] ? $jrejected : $rejected ) },
|
|
'delete'
|
|
],
|
|
);
|
|
|
|
#
|
|
# Make a RE from the keys
|
|
#
|
|
my $mkeys = join('|',keys(%moderation_keys));
|
|
my $mkeys_re = qr{^$mkeys$};
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Connect to the database
|
|
#-------------------------------------------------------------------------------
|
|
my $dbhost = $config{database}->{host} // '127.0.0.1';
|
|
my $dbport = $config{database}->{port} // 3306;
|
|
my $dbname = $config{database}->{name};
|
|
my $dbuser = $config{database}->{user};
|
|
my $dbpwd = $config{database}->{password};
|
|
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
|
|
$dbuser, $dbpwd, { AutoCommit => 1 } )
|
|
or croak $DBI::errstr;
|
|
|
|
#$dbh = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport",
|
|
# $dbuser, $dbpwd, { AutoCommit => 1 } )
|
|
# or croak $DBI::errstr;
|
|
|
|
#
|
|
# Enable client-side UTF8
|
|
#
|
|
$dbh->{mysqlenable_utf8} = 1;
|
|
|
|
#
|
|
# Set the local timezone to UTC for this connection
|
|
#
|
|
$dbh->do("set time_zone = '+00:00'") or carp $dbh->errstr;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Set up logging keeping the default log layout except for the date
|
|
#-------------------------------------------------------------------------------
|
|
my $log = Log::Handler->new();
|
|
|
|
$log->add(
|
|
file => {
|
|
timeformat => "%F %H:%M:%S (%s)",
|
|
filename => $logfile,
|
|
maxlevel => 7,
|
|
minlevel => 0
|
|
}
|
|
);
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# If mode JSON is selected with -json option we'll check the defined 'json'
|
|
# directory, otherwise we'll look for work in the maildrop.
|
|
#-------------------------------------------------------------------------------
|
|
if ($jsonmode) {
|
|
#
|
|
# Look for JSON files
|
|
#
|
|
@ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' );
|
|
my $jsonfileRE = qr{(\.(?i:json)|[^.]+)$};
|
|
#<<<
|
|
@files = File::Find::Rule
|
|
->file()
|
|
->name($jsonfileRE)
|
|
->not( File::Find::Rule->new->name(@ignore) )
|
|
->maxdepth(1)
|
|
->in($jsondir);
|
|
#>>>
|
|
|
|
if ( $DEBUG > 1 ) {
|
|
_debug(1,
|
|
"Files found in $jsondir",
|
|
Dumper( \@files ),
|
|
);
|
|
}
|
|
|
|
#
|
|
# There may be nothing there
|
|
#
|
|
die "No JSON files found; nothing to do\n" unless (@files);
|
|
|
|
#
|
|
# Convert the JSON files into the %updates hash
|
|
#
|
|
do_JSON_process( \@files, \%updates, $log, $dry_run, $verbose );
|
|
|
|
}
|
|
else {
|
|
#
|
|
# Look for mail files
|
|
#
|
|
@ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' );
|
|
|
|
my $mailfileRE = qr{(\.(?i:eml)|[^.]+)$};
|
|
#<<<
|
|
@files = File::Find::Rule
|
|
->file()
|
|
->name($mailfileRE)
|
|
->not( File::Find::Rule->new->name(@ignore) )
|
|
->maxdepth(1)
|
|
->in($maildrop);
|
|
#>>>
|
|
|
|
if ( $DEBUG > 1 ) {
|
|
_debug(1,
|
|
"Files found in $maildrop",
|
|
Dumper( \@files ),
|
|
);
|
|
}
|
|
|
|
#
|
|
# There may be nothing there
|
|
#
|
|
unless (@files) {
|
|
warn "No mail found; nothing to do\n";
|
|
exit;
|
|
}
|
|
|
|
#
|
|
# Convert the mail messages into the %updates hash
|
|
#
|
|
do_mail_process( \@files, \%updates, $log, $dry_run, $verbose );
|
|
|
|
}
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Check that comments aren't already in the database
|
|
#-------------------------------------------------------------------------------
|
|
do_database_check( $dbh, \%updates, $log, $verify, $dry_run );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Perform the moderation pass
|
|
#-------------------------------------------------------------------------------
|
|
do_moderation( $dbh, \%updates, $template, $log, $verify, $dry_run );
|
|
|
|
#
|
|
# Dump the data structure if requested
|
|
#
|
|
_debug($DEBUG > 1, '%updates: ' . Dumper(\%updates));
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# For the comments which are valid and approved, add them to the database
|
|
#-------------------------------------------------------------------------------
|
|
print '-' x 80, "\n" if $verbose > 0;
|
|
do_update( $dbh, \%updates, $log, $dry_run, $verbose );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Walk the captured data reporting what happened (would happen) moving the
|
|
# files to the appropriate places and telling the server to do things to the
|
|
# originals (if appropriate)
|
|
#-------------------------------------------------------------------------------
|
|
foreach my $file ( sort_by_timestamp( \%updates ) ) {
|
|
#
|
|
# Deal with each file depending on the selected action (from the set
|
|
# 'Approve', 'Ban' and 'Reject')
|
|
#
|
|
if ( $updates{$file}->{action} =~ $mkeys_re ) {
|
|
#
|
|
# Get the elements we need for the function arguments below.
|
|
# NOTE: Apologies! This is obscure. The action key will be 'Approve',
|
|
# 'Ban' or 'Reject', and the lookup will return the name of the
|
|
# destination (for messages), an anonymous subroutine reference which determines
|
|
# the directory, and a keyword to send to the web server to dispose of
|
|
# the comment. The subref must be called with $jsonmode as the
|
|
# argument to get the destination which is different depending on
|
|
# whether we're in mail or JSON mode. This really needs to be made
|
|
# simpler and easier to understand when this code is revisited!
|
|
#
|
|
my ( $name, $destsub, $serverac )
|
|
= ( @{ $moderation_keys{ $updates{$file}->{action} } } );
|
|
my $dest = &$destsub($jsonmode);
|
|
|
|
#
|
|
# Print moderation settings
|
|
#
|
|
_debug($DEBUG > 1,
|
|
'$name: ' . $name,
|
|
'$dest: ' . $dest,
|
|
'$serverac: ' . $serverac,
|
|
);
|
|
|
|
#
|
|
# Report what we're doing with the file and do it if appropriate
|
|
#
|
|
report_action( $updates{$file}, $name, $dest, $log,
|
|
$dry_run, $verbose );
|
|
|
|
#
|
|
# Tell the server what to do, but only if this is the live instance
|
|
# and we're not in dry-run mode
|
|
#
|
|
if ($live) {
|
|
unless (
|
|
call_back(
|
|
$callback_template, $updates{$file}->{JSON}->{key},
|
|
$serverac, $cb_user, $cb_password, $log, $dry_run, $verbose
|
|
)
|
|
)
|
|
{
|
|
print "Server update failed\n" if $verbose > 0;
|
|
}
|
|
}
|
|
else {
|
|
unless ($dry_run) {
|
|
print "Not live; no server action taken\n" if $verbose > 0;
|
|
}
|
|
}
|
|
}
|
|
elsif ( $updates{$file}->{action} eq 'Ignore' ) {
|
|
#
|
|
# The action here is 'Ignore' which means just leave it alone. Since
|
|
# only the moderator can choose this, or the failure to find the
|
|
# episode in the database, we know it's not a broken mail message or
|
|
# bad JSON.
|
|
#
|
|
if ( $verbose > 0 ) {
|
|
print "File '$file' ignored\n";
|
|
print_errors( $updates{$file}->{errors} );
|
|
}
|
|
}
|
|
elsif ( $updates{$file}->{skip} > 0 ) {
|
|
#
|
|
# The comment was found to be already in the database so we are
|
|
# skipping it. We want to report this and move the file to the
|
|
# 'processed' directory.
|
|
#
|
|
$shortfile = $updates{$file}->{shortfile};
|
|
|
|
unless ($dry_run) {
|
|
if ( $verbose > 0 ) {
|
|
print "Moving '$shortfile' to 'processed'\n";
|
|
print "Reason: comment is already in the database\n";
|
|
print_errors( $updates{$file}->{errors} );
|
|
}
|
|
$log->info("Moving $file to 'processed'");
|
|
unless ( moveFile( $file, ($jsonmode ? $jprocessed : $processed) ) ) {
|
|
warn "Unable to move $file\n";
|
|
$log->error("Failed to move $file");
|
|
}
|
|
}
|
|
else {
|
|
if ( $verbose > 0 ) {
|
|
print "Would move '$shortfile' to 'processed' (dry run)\n";
|
|
print "Reason: comment is already in the database\n";
|
|
print_errors( $updates{$file}->{errors} );
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
#
|
|
# We aborted this one early in the processing and validation, so it's
|
|
# really messed up. We're going to reject it outright with error
|
|
# messages. We can't tell the server because we don't have the
|
|
# key to do it.
|
|
#
|
|
$shortfile = $updates{$file}->{shortfile};
|
|
|
|
unless ($dry_run) {
|
|
if ( $verbose > 0 ) {
|
|
print "Moving $shortfile to 'rejected'\n";
|
|
print "Reason: failed validation\n";
|
|
print_errors( $updates{$file}->{errors} );
|
|
}
|
|
$log->info("Moving $file to 'rejected'");
|
|
unless ( moveFile( $file, ($jsonmode ? $jrejected : $rejected) ) ) {
|
|
warn "Unable to move $file\n";
|
|
$log->error("Failed to move $file");
|
|
}
|
|
}
|
|
else {
|
|
if ( $verbose > 0 ) {
|
|
print "Would move $shortfile to 'rejected' (dry run)\n";
|
|
print "Reason: failed validation\n";
|
|
print_errors( $updates{$file}->{errors} );
|
|
}
|
|
}
|
|
}
|
|
|
|
print '.' x 80, "\n" if $verbose > 0;
|
|
}
|
|
|
|
exit;
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_JSON_process
|
|
# PURPOSE: Process any pending JSON files found in a directory search
|
|
# PARAMETERS: $files arrayref containing the JSON files found
|
|
# $updates hashref to contain the details collected from
|
|
# the JSON files
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: The files containing JSON comments have been found earlier and
|
|
# passed to this function. They are processed in sorted order.
|
|
# The JSON is decoded into a hash and is validated, and the
|
|
# elements cleaned up. The JSON is stored for later in the
|
|
# $updates structure. If all tests and checks are passed the
|
|
# entry for this file in the $updates structure is marked as
|
|
# valid. If anything fails the entry is marked as invalid for
|
|
# the next stages of processing.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_JSON_process {
|
|
my ( $files, $updates, $log, $dry_run, $verbose ) = @_;
|
|
|
|
my ( $shortfile, $dt );
|
|
my $json = JSON->new;
|
|
|
|
#
|
|
# Examine the files we found
|
|
#
|
|
foreach my $file ( sort(@$files) ) {
|
|
print ">> $file\n" if $verbose > 0;
|
|
$log->notice("Processing $file") unless $dry_run;
|
|
|
|
#
|
|
# Initialise for this file, marking it as invalid by default.
|
|
#
|
|
$updates->{$file} = {};
|
|
$updates->{$file}->{valid} = 0;
|
|
$updates->{$file}->{moderated} = 0;
|
|
$updates->{$file}->{errors} = [];
|
|
$updates->{$file}->{action} = '';
|
|
$updates->{$file}->{file} = $file; # For convenience
|
|
$updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file);
|
|
|
|
#
|
|
# Open the nominated file and load its contents into a scalar. Because
|
|
# 'read_text' does its own internal error handling we need try/catch to
|
|
# intercept it and provide our own.
|
|
#
|
|
my $json_text;
|
|
try {
|
|
$json_text = read_text($file);
|
|
}
|
|
|
|
catch ($e){
|
|
warn "Failed to read JSON file $file\n";
|
|
next;
|
|
}
|
|
|
|
#
|
|
# Decode and process the JSON, returning FALSE if it fails so we get
|
|
# a log entry (unless it's dry run mode)
|
|
#
|
|
unless (
|
|
convert_JSON(
|
|
$json, $json_text, $updates, $file,
|
|
$log, $dry_run, $verbose
|
|
)
|
|
)
|
|
{
|
|
$log->warning("JSON decode failure; aborting '$shortfile'")
|
|
unless $dry_run;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_mail_process
|
|
# PURPOSE: Process any pending email found in a directory search
|
|
# PARAMETERS: $files arrayref containing the mail files found
|
|
# $updates hashref to contain the details collected from
|
|
# the mail files
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: The files containing email have been found earlier and passed
|
|
# to this function. They are processed in sorted order. Each
|
|
# file is parsed as a mail file and the various destination
|
|
# addresses collected. (TODO Check these against permitted
|
|
# addresses). Each message is expected to be in MIME format with
|
|
# an attachment, which is extracted. The target mimetype
|
|
# (application/json) is checked for (TODO This is a global and
|
|
# shouldn't be). The JSON it contains is decoded into a hash and
|
|
# is validated, and the elements cleaned up. The JSON is stored
|
|
# for later in the $updates structure. If all tests and checks
|
|
# are passed the entry for this file in the $updates structure
|
|
# is marked as valid. If anything fails the entry is marked as
|
|
# invalid for the next stages of processing.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_mail_process {
|
|
my ( $files, $updates, $log, $dry_run, $verbose ) = @_;
|
|
|
|
my ( $shortfile, $dt );
|
|
|
|
#
|
|
# Prepare for parsing mail and JSON
|
|
#
|
|
my $parser = MIME::Parser->new;
|
|
$parser->output_under("/tmp");
|
|
|
|
my $json = JSON->new;
|
|
|
|
#
|
|
# Examine the files we found
|
|
#
|
|
foreach my $file ( sort(@$files) ) {
|
|
print ">> $file\n" if $verbose > 0;
|
|
$log->notice("Processing $file") unless $dry_run;
|
|
|
|
#
|
|
# Initialise for this file, marking it as invalid by default.
|
|
#
|
|
$updates->{$file} = {};
|
|
$updates->{$file}->{valid} = 0;
|
|
$updates->{$file}->{moderated} = 0;
|
|
$updates->{$file}->{errors} = [];
|
|
$updates->{$file}->{action} = '';
|
|
$updates->{$file}->{file} = $file; # For convenience
|
|
$updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file);
|
|
|
|
#
|
|
# Open the current file and load its contents into a Mail::Internet object
|
|
# TODO: Consider using IO::All
|
|
#
|
|
open( my $mfh, '<', $file )
|
|
or die "Failed to open input file '$file' : $!\n";
|
|
|
|
my $mi_obj = Mail::Internet->new($mfh);
|
|
|
|
close($mfh)
|
|
or warn "Failed to close input file '$file' : $!\n";
|
|
|
|
#
|
|
# This should be to comments@hackerpublicradio.org in one of these headers
|
|
#
|
|
my @addresses = (
|
|
Mail::Address->parse( $mi_obj->head->get('Resent-to') ),
|
|
Mail::Address->parse( $mi_obj->head->get('Resent-cc') )
|
|
);
|
|
if ( !@addresses ) {
|
|
@addresses = (
|
|
Mail::Address->parse( $mi_obj->head->get('To') ),
|
|
Mail::Address->parse( $mi_obj->head->get('Cc') )
|
|
);
|
|
}
|
|
|
|
#
|
|
# Report the headers we're interested in if requested
|
|
#
|
|
if ($verbose > 1) {
|
|
foreach my $addr (@addresses) {
|
|
my $dest = lc( $addr->address );
|
|
print "To: $dest\n";
|
|
}
|
|
print '~ ' x 40,"\n";
|
|
}
|
|
|
|
#
|
|
# TODO Check the message is actually for us
|
|
#
|
|
|
|
my ( $ct, $cte, @body, $body, $results );
|
|
|
|
#
|
|
# Parse the message in the assumption it's MIME. Report any errors and
|
|
# warnings.
|
|
#
|
|
my $entity = $parser->parse_data( $mi_obj->as_string() );
|
|
unless ($entity) {
|
|
$results = $parser->results;
|
|
foreach my $pmsg ( $results->msgs ) {
|
|
print STDERR "$pmsg\n";
|
|
}
|
|
}
|
|
|
|
#
|
|
# If we got multiple MIME parts then look for the first 'application/json'
|
|
# element and save it as the body we'll work on.
|
|
#
|
|
if ( scalar( $entity->parts ) > 0 ) {
|
|
#
|
|
# MIME message
|
|
#
|
|
if ( $DEBUG > 1 ) {
|
|
_debug(1, 'Message is MIME with multiple parts');
|
|
foreach my $ent ( $entity->parts ) {
|
|
_debug(1, "Type: " . $ent->mime_type);
|
|
}
|
|
}
|
|
|
|
#
|
|
# Get the first MIME part of the type we want and turn it into an
|
|
# array.
|
|
# TODO: Might it be better to make a string here since later we simply
|
|
# convert the array to a string again.
|
|
#
|
|
foreach my $ent ( $entity->parts ) {
|
|
if ( $ent->mime_type eq $target_mimetype ) {
|
|
$ct = $ent->mime_type;
|
|
#
|
|
# FIXME: Using 'bodyhandle' is supposed to decode the entity
|
|
# body, but this is untested. Also, we have to use 'as_string'
|
|
# then split on "\n" and stick the newlines back on the array
|
|
# elements, which seems a bit weird. Have to monitor this to
|
|
# see how it behaves.
|
|
#
|
|
#@body = @{ $ent->body() };
|
|
@body = map {"$_\n"}
|
|
split( "\n", $ent->bodyhandle->as_string );
|
|
_debug($DEBUG > 1,
|
|
'Length of body = ' . scalar(@body) . ' lines'
|
|
);
|
|
|
|
#
|
|
# If we got a non-empty body then exit the loop
|
|
#
|
|
last if @body;
|
|
}
|
|
}
|
|
|
|
#
|
|
# We found no suitable part so there's nothing to process here. We'll
|
|
# let the later phases detect this though
|
|
#
|
|
unless (@body) {
|
|
saveError( "MIME message has no valid '$target_mimetype' element",
|
|
$updates->{$file}->{errors} , $log, $dry_run, $verbose );
|
|
next;
|
|
}
|
|
}
|
|
else {
|
|
#
|
|
# The message has no MIME parts
|
|
#
|
|
saveError( "Message has no attachment",
|
|
$updates->{$file}->{errors}, $log, $dry_run, $verbose );
|
|
next;
|
|
}
|
|
|
|
#
|
|
# Display the body unless we're not very verbose
|
|
#
|
|
$body = join( "", @body );
|
|
print "$body\n" if $verbose > 2;
|
|
|
|
#
|
|
# Decode and process the JSON, returning FALSE if it fails so we get
|
|
# a log entry (unless it's dry run mode)
|
|
#
|
|
unless (
|
|
convert_JSON(
|
|
$json, $body, $updates, $file,
|
|
$log, $dry_run, $verbose
|
|
)
|
|
)
|
|
{
|
|
$log->warning("JSON decode failure; aborting '$shortfile'")
|
|
unless $dry_run;
|
|
}
|
|
|
|
}
|
|
continue {
|
|
print '-' x 80, "\n" if $verbose > 0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_database_check
|
|
# PURPOSE: Look to see if the collected comments are already in the
|
|
# database
|
|
# PARAMETERS: $dbh database handle
|
|
# $updates structure (hashref) holding collected
|
|
# information about the files and comments in
|
|
# them
|
|
# $log log handle
|
|
# $verify Boolean flagging the need to verify
|
|
# TODO: Is this right?
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Looks for each comment in %$updates in the database. The
|
|
# search criteria are the comment timestamp (in database format)
|
|
# and the comment author name. The timestamp is expected to have
|
|
# been converted from full ISO format (2024-01-10T11:16:11Z) to
|
|
# the format MySQL likes (2024-01-10 11:16:11). the original
|
|
# date is in the element 'orig_timestamp'. If there is a match
|
|
# in the database then the comment is to be skipped, and we mark
|
|
# the element appropriately and display and log a message. If no
|
|
# match we can proceed without skipping;
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_database_check {
|
|
my ( $dbh, $updates, $log, $verify, $dry_run ) = @_;
|
|
|
|
my ( $sth1, $h1, $shortfile, $message );
|
|
|
|
#
|
|
# Look for a comment with details matching an incoming one. If found
|
|
# someone else has approved it so we need to skip it.
|
|
#
|
|
$sth1 = $dbh->prepare(
|
|
q{SELECT id FROM comments
|
|
WHERE comment_timestamp = ?
|
|
AND comment_author_name = ?}
|
|
) or die $DBI::errstr;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
foreach my $file ( sort_by_timestamp($updates) ) {
|
|
#
|
|
# Make a shortcut to the JSON part, and collect the short form of the
|
|
# filename.
|
|
#
|
|
my $ptr = $updates->{$file}->{JSON};
|
|
#my $errors = $updates->{$file}->{errors};
|
|
$shortfile = $updates{$file}->{shortfile};
|
|
|
|
#
|
|
# Look for the comment in the database
|
|
#
|
|
$sth1->execute( $ptr->{comment_timestamp},
|
|
$ptr->{comment_author_name} );
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
#
|
|
# Did we find it?
|
|
#
|
|
if ( $h1 = $sth1->fetchrow_hashref() ) {
|
|
#
|
|
# Found in the database. Mark this comment to be skipped and save
|
|
# an error report
|
|
#
|
|
#$updates->{$file}->{valid} = 0;
|
|
$updates->{$file}->{skip} = 1;
|
|
|
|
$message = "Comment in file '$shortfile' is already in the database";
|
|
print "$message\n" if $verbose > 0;
|
|
$log->warn($message) unless $dry_run;
|
|
}
|
|
else {
|
|
#
|
|
# Not found
|
|
#
|
|
$updates->{$file}->{skip} = 0;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_moderation
|
|
# PURPOSE: Moderate all the valid comments in the 'updates' structure
|
|
# PARAMETERS: $dbh database handle
|
|
# $updates structure (hashref) holding collected
|
|
# information about the files and comments in
|
|
# them
|
|
# $tt_template path to the template we're using
|
|
# $log log handle
|
|
# $verify Boolean flagging the need to verify
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Performs the moderation pass on the accumulated comments in
|
|
# the $updates structure. This is a hash (referenced by
|
|
# $updates) containing the filename as the key. Each of these
|
|
# lead to a hashref containing the JSON data (in a hashref), an
|
|
# arrayref of error messages and various flags denoting whether
|
|
# the comment is valid, has been moderated and what action the
|
|
# moderator took.
|
|
# The flags and error messages are manipulated here depending on
|
|
# what choice is made during the moderation process.
|
|
# The contents of the JSON hash (or a subset thereof) and the
|
|
# data about the show returned from a database query are
|
|
# displayed for the moderator using a template, and a menu of
|
|
# 'Approve', 'Ban', 'Reject', Ignore' is presented.
|
|
# On exit the structure referenced by $updates has been modified
|
|
# to signal what should be done with each of the comments.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: TODO: References globals and should not.
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_moderation {
|
|
my ( $dbh, $updates, $tt_template, $log, $verify, $dry_run ) = @_;
|
|
|
|
my ( $sth1, $h1, $cdt, $age );
|
|
my $agelimit = 60 * 60 * 24;
|
|
|
|
#
|
|
# We'll get details of the show each comment is associated with using this
|
|
# query. It will be passed to the TT² template.
|
|
#
|
|
$sth1 = $dbh->prepare(
|
|
q{SELECT e.id, e.date, e.title, h.host FROM eps e
|
|
JOIN hosts h ON e.hostid = h.hostid
|
|
WHERE e.id = ?}
|
|
) or die $DBI::errstr;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
#
|
|
# We'll display each comment with a TT² template
|
|
#
|
|
my $tt = Template->new(
|
|
{ ABSOLUTE => 1,
|
|
ENCODING => 'utf8',
|
|
INCLUDE_PATH => $basedir,
|
|
}
|
|
);
|
|
my $vars = {};
|
|
my $choice;
|
|
|
|
#
|
|
# Process the valid comments, and not marked as 'skip'
|
|
#
|
|
foreach my $file ( sort_by_timestamp($updates) ) {
|
|
if ( $updates->{$file}->{valid} > 0 && $updates->{$file}->{skip} == 0 ) {
|
|
#
|
|
# Make shortcuts
|
|
#
|
|
my $ptr = $updates->{$file}->{JSON};
|
|
my $errors = $updates->{$file}->{errors};
|
|
|
|
#
|
|
# Check the age of the comment if we're verifying, and ignore if
|
|
# less than the age limit. The original timestamp should have been
|
|
# reserved even though we had to reformat for MySQL/MariaDB.
|
|
#
|
|
if ($verify) {
|
|
$cdt = DateTime::Format::ISO8601->parse_datetime(
|
|
$ptr->{orig_timestamp}
|
|
);
|
|
$age = DateTime->now->epoch - $cdt->epoch;
|
|
if ($age < $agelimit) {
|
|
print "Choice 'Ignore' auto-selected due to comment age\n";
|
|
$updates->{$file}->{action} = 'Ignore';
|
|
$log->info("Ignored comment in $file")
|
|
unless $dry_run;
|
|
$updates->{$file}->{moderated} = 0;
|
|
next;
|
|
}
|
|
}
|
|
|
|
#
|
|
# Get the show details from the database
|
|
#
|
|
$sth1->execute( $ptr->{eps_id} );
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
#
|
|
# Do we know this episode?
|
|
# NOTE: If the database we're using is the clone this might fail
|
|
# because it's out of step with the live one
|
|
#
|
|
if ( $h1 = $sth1->fetchrow_hashref() ) {
|
|
#
|
|
# Populate the variables for the template
|
|
#
|
|
$vars->{file} = $file;
|
|
$vars->{db} = $h1;
|
|
$vars->{comment} = $ptr;
|
|
|
|
#
|
|
# Call the template and print the result
|
|
#
|
|
my $document;
|
|
$tt->process( $tt_template, $vars, \$document,
|
|
{ binmode => ':utf8' } )
|
|
|| die $tt->error(), "\n";
|
|
print $document;
|
|
|
|
#
|
|
# Prompt for moderation. Note: these prompts are used to flag
|
|
# the choice in the %updates hash, so don't change them unless
|
|
# you also change them elsewhere.
|
|
#
|
|
$choice = prompt(
|
|
'Choose...',
|
|
-menu => [ 'Approve', 'Ban', 'Reject', 'Ignore' ],
|
|
'>'
|
|
);
|
|
if ($choice) {
|
|
print "Choice '$choice' selected\n";
|
|
if ( $choice eq 'Approve' ) {
|
|
$updates->{$file}->{action} = $choice;
|
|
$log->info("Approved comment in $file")
|
|
unless $dry_run;
|
|
$updates->{$file}->{moderated} = 1;
|
|
}
|
|
elsif ( $choice eq 'Ban' ) {
|
|
$updates->{$file}->{action} = $choice;
|
|
saveError(
|
|
"Comment in '$file' banned by the moderator",
|
|
$errors,
|
|
$log, $dry_run, 0
|
|
);
|
|
$updates->{$file}->{valid} = 0;
|
|
$updates->{$file}->{moderated} = 1;
|
|
}
|
|
elsif ( $choice eq 'Reject' ) {
|
|
$updates->{$file}->{action} = $choice;
|
|
saveError(
|
|
"Comment in '$file' rejected by the moderator",
|
|
$errors,
|
|
$log, $dry_run, 0
|
|
);
|
|
$updates->{$file}->{valid} = 0;
|
|
$updates->{$file}->{moderated} = 1;
|
|
}
|
|
elsif ( $choice eq 'Ignore' ) {
|
|
$updates->{$file}->{action} = $choice;
|
|
$log->info("Ignored comment in $file")
|
|
unless $dry_run;
|
|
$updates->{$file}->{moderated} = 0;
|
|
}
|
|
}
|
|
else {
|
|
print "\n";
|
|
undef $choice; # Seem to need this after aborting 'prompt'
|
|
print "Ignored this comment\n";
|
|
$log->info("Ignored comment in $file (default action)")
|
|
unless $dry_run;
|
|
$updates->{$file}->{moderated} = 0;
|
|
}
|
|
#print "\n";
|
|
}
|
|
else {
|
|
#
|
|
# We couldn't find the episode in the database
|
|
#
|
|
printf "Episode %s is not known\n", $ptr->{eps_id};
|
|
$updates->{$file}->{action} = 'Ignore';
|
|
saveError(
|
|
"Episode referenced in '$file' is not known",
|
|
$errors,
|
|
$log, $dry_run, 0
|
|
);
|
|
$updates->{$file}->{moderated} = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: do_update
|
|
# PURPOSE: Add valid, approved comments to the database (or pretend to do
|
|
# so if in dry-run mode)
|
|
# PARAMETERS: $dbh database handle
|
|
# $updates structure (hashref) holding collected
|
|
# information about the files and comments in
|
|
# them
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Performs a database update for all valid approved comments in
|
|
# the structure referenced by $updates unless in dry-run mode
|
|
# where what would be done will be reported.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: TODO: References globals and should not.
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub do_update {
|
|
my ( $dbh, $updates, $log, $dry_run, $verbose ) = @_;
|
|
|
|
my $sql = SQL::Abstract->new;
|
|
my ( $stmt, @bind, $show, $count );
|
|
|
|
$count = 0;
|
|
|
|
foreach my $file ( sort_by_timestamp($updates) ) {
|
|
if ( $updates->{$file}->{valid} > 0
|
|
&& $updates->{$file}->{action} eq 'Approve' )
|
|
{
|
|
$count++;
|
|
|
|
$show = $updates->{$file}->{JSON}->{eps_id};
|
|
|
|
my $shortfile = $updates{$file}->{shortfile};
|
|
|
|
#
|
|
# Build the statement
|
|
# NOTE: The JSON keys we wish to use match the column names in the
|
|
# 'comments' table. The second argument to 'insert' needs to be
|
|
# a hashref, so we use @json_keys to list the keys we need, with
|
|
# the whole expression returning an anonymous hash (hashref)
|
|
#
|
|
( $stmt, @bind )
|
|
= $sql->insert( 'comments',
|
|
{ %{ $updates->{$file}->{JSON} }{@json_keys} } );
|
|
|
|
#
|
|
# Display the statement we constructed if requested
|
|
#
|
|
_debug($DEBUG > 1,
|
|
"$stmt\n'",
|
|
join( "','", @bind )
|
|
);
|
|
|
|
#
|
|
# Take action or report potential changes
|
|
#
|
|
unless ($dry_run) {
|
|
#
|
|
# Apply the SQL
|
|
#
|
|
$dbh->do( $stmt, undef, @bind );
|
|
if ( $dbh->err ) {
|
|
#
|
|
# Something broke during the database update. Warn about
|
|
# it, save the error and make the action 'Ignore' so the
|
|
# file is kept for further investigation (or to be
|
|
# edited). Marking the file as invalid may be overkill.
|
|
#
|
|
warn $dbh->errstr;
|
|
saveError(
|
|
$dbh->errstr,
|
|
$updates{$file}->{errors},
|
|
$log, $dry_run, $verbose
|
|
);
|
|
# $updates{$file}->{valid} = 0;
|
|
$updates->{$file}->{action} = 'Ignore';
|
|
}
|
|
else {
|
|
$log->info("$file: inserted comment for show $show");
|
|
|
|
if ( $verbose > 0 ) {
|
|
print "Updated show $show from file $shortfile\n";
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
print
|
|
"Would have added comment for show $show from file $shortfile\n"
|
|
if $verbose > 0;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if ($verbose > 0 && $count > 0) {
|
|
print "\n", '-' x 80, "\n";
|
|
}
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: convert_JSON
|
|
# PURPOSE: Converts the JSON from a file or an email into an internal
|
|
# data structure
|
|
# PARAMETERS: $json JSON object
|
|
# $jtext JSON in text format
|
|
# $updates hashref to contain the details collected from
|
|
# the JSON and elsewhere
|
|
# $file the file we're processing (hash key)
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS:
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub convert_JSON {
|
|
my ( $json, $jtext, $updates, $file, $log, $dry_run, $verbose ) = @_;
|
|
|
|
my ($comment, $dt, $shortfile);
|
|
|
|
$shortfile = $updates{$file}->{shortfile};
|
|
|
|
#
|
|
# Decode the JSON into a hashref
|
|
#
|
|
try {
|
|
$comment = $json->decode($jtext);
|
|
}
|
|
|
|
#
|
|
# If the JSON decode failed we have a problem
|
|
#
|
|
catch ($e){
|
|
saveError(
|
|
"JSON in file '$shortfile' failed to decode",
|
|
$updates->{$file}->{errors},
|
|
$log, $dry_run, $verbose
|
|
);
|
|
return 0;
|
|
}
|
|
|
|
#
|
|
# Validate the comment fields: do we have all we expected?
|
|
#
|
|
unless (validate_comment($comment)) {
|
|
saveError(
|
|
"JSON in file '$shortfile' failed to validate",
|
|
$updates->{$file}->{errors},
|
|
$log, $dry_run, $verbose
|
|
);
|
|
return 0;
|
|
}
|
|
|
|
#
|
|
# Perform some cleaning
|
|
#
|
|
foreach my $key (@cleanable_keys) {
|
|
$comment->{$key} = clean_comment_field( $comment->{$key} );
|
|
}
|
|
|
|
#
|
|
# ISO 8601 timestamps cause MySQL problems (or have started to) so we
|
|
# parse them and convert to an acceptable format. We keep the original
|
|
# format in $verify is set and we have to check its age.
|
|
#
|
|
try {
|
|
$dt = DateTime::Format::ISO8601->parse_datetime(
|
|
$comment->{comment_timestamp} );
|
|
}
|
|
catch ($e){
|
|
die "Failed to parse comment timestamp\n";
|
|
}
|
|
$comment->{orig_timestamp} = $comment->{comment_timestamp};
|
|
$comment->{comment_timestamp} = $dt->ymd . ' ' . $dt->hms;
|
|
|
|
#
|
|
# Stash the JSON for later
|
|
#
|
|
$updates->{$file}->{JSON} = $comment;
|
|
|
|
#
|
|
# This file is valid we think
|
|
#
|
|
$updates->{$file}->{valid} = 1;
|
|
$log->info("$file seems valid") unless $dry_run;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: report_action
|
|
# PURPOSE: Does all of the necessary reporting as the email or JSON files
|
|
# are being acted upon
|
|
# PARAMETERS: $struct hashref holding collected comment data for
|
|
# a given mail file
|
|
# $destname name of destination
|
|
# $dest destination directory
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: In dry run mode reports what would have been done along with
|
|
# any errors, but only in verbose mode. Otherwise reports the
|
|
# action (in verbose mode) and logs it too. It also moves the
|
|
# mail file to the requested directory.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub report_action {
|
|
my ( $struct, $destname, $dest, $log, $dry_run, $verbose ) = @_;
|
|
|
|
#
|
|
# Get values with a hashref slice
|
|
#
|
|
my ( $file, $shortfile ) = @$struct{ 'file', 'shortfile' };
|
|
|
|
unless ($dry_run) {
|
|
if ($verbose > 0) {
|
|
print "Moving '$shortfile' to '$destname'\n";
|
|
print_errors($struct->{$file}->{errors});
|
|
}
|
|
$log->info("Moving '$file' to '$destname'");
|
|
unless ( moveFile( $file, $dest ) ) {
|
|
warn "Unable to move $shortfile\n";
|
|
$log->error("Failed to move $shortfile");
|
|
}
|
|
}
|
|
else {
|
|
if ($verbose > 0) {
|
|
print "Would move '$shortfile' to '$destname' (dry run)\n";
|
|
print_errors($struct->{$file}->{errors});
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: call_back
|
|
# PURPOSE: Invokes a PHP interface on the HPR server through which
|
|
# a comment can be marked as fully processed
|
|
# PARAMETERS: $format sprintf template for building the URL
|
|
# $key the key from the JSON
|
|
# $action the action to be taken
|
|
# $user the username from the configuration
|
|
# $pass the password from the configuration
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS: True (1) if the call performed as it should, otherwise false
|
|
# (0)
|
|
# DESCRIPTION: Checks that the correct parameters have been supplied, then
|
|
# builds the URL we're going to use from the format string, key
|
|
# and action. If in dry-run mode this URL is reported and
|
|
# nothing more. Otherwise the URL is queried with an HTTP 'GET'.
|
|
# If the response shows success then the action is considered to
|
|
# have been taken. The returned message is displayed (in verbose
|
|
# mode) and success is logged. If the 'GET' failed then
|
|
# a warning message is generated along with a warning log entry.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub call_back {
|
|
my ( $format, $key, $action, $user, $pass, $log, $dry_run, $verbose ) = @_;
|
|
|
|
my ( $ua, $url, $response, $code, $message, $content );
|
|
|
|
#
|
|
# Check calling parameters
|
|
#
|
|
unless ($key) {
|
|
warn "Invalid call to 'call_back' subroutine; missing key\n";
|
|
return 0;
|
|
}
|
|
|
|
unless ( $action =~ /^(approve|block|delete)$/ ) {
|
|
warn "Invalid call to 'call_back' subroutine; invalid action\n";
|
|
return 0;
|
|
}
|
|
|
|
#
|
|
# Build the URL
|
|
#
|
|
$url = sprintf( $format, $key, $action );
|
|
|
|
#
|
|
# Perform the call to clean up the JSON files, etc, or just say what would
|
|
# have been done
|
|
#
|
|
unless ($dry_run) {
|
|
print "Making the server call:\nGET $url\n" if $verbose > 0;
|
|
|
|
$ua = LWP::UserAgent->new;
|
|
$ua->timeout(180); # Module default is 180 sec = 3 mins
|
|
$ua->credentials("hub.hackerpublicradio.org:443", "cms", $user, $pass);
|
|
|
|
$response = $ua->get($url);
|
|
if ( $response->is_success ) {
|
|
#
|
|
# Server call succeeded
|
|
#
|
|
$code = $response->code;
|
|
$message = $response->message;
|
|
$content = $response->decoded_content;
|
|
if ($verbose > 0) {
|
|
print "Success: $code/$message\n";
|
|
print "Content: $content\n" if $content;
|
|
}
|
|
$log->info("Successful server message $key/$action: " .
|
|
$response->status_line);
|
|
return 1;
|
|
}
|
|
else {
|
|
#
|
|
# Server call failed
|
|
#
|
|
warn $response->status_line . "\n";
|
|
$log->error("Failed server message $key/$action: " .
|
|
$response->status_line);
|
|
# Try to log as much as possible
|
|
$log->error( $response->decoded_content )
|
|
if $response->decoded_content;
|
|
return 0;
|
|
}
|
|
}
|
|
else {
|
|
#
|
|
# Always a success in dry-run mode
|
|
#
|
|
print "Would have made the server call:\nGET $url\n" if $verbose > 0;
|
|
return 1;
|
|
}
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: print_errors
|
|
# PURPOSE: Prints the contents of the error array if there's anything in
|
|
# it (i.e. not empty)
|
|
# PARAMETERS: $errors arrayref possibly containing errors
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Simplifies printing in the case when there's nothing to print
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub print_errors {
|
|
my ($errors) = @_;
|
|
|
|
return unless defined($errors);
|
|
|
|
print "Errors: ", join( "\n", @{$errors} ), "\n"
|
|
if @{$errors};
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: saveError
|
|
# PURPOSE: Saves an error message in an error structure and logs it if in
|
|
# live mode
|
|
# PARAMETERS: $message error message text
|
|
# $struct structure (arrayref) to hold the text
|
|
# $log log handle
|
|
# $dry_run Boolean flagging dry-run mode
|
|
# $verbose Integer level of verbosity
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Simplifies the saving of an error message and logging it
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub saveError {
|
|
my ( $message, $struct, $log, $dry_run, $verbose ) = @_;
|
|
|
|
$log->warn($message) unless $dry_run;
|
|
push( @{$struct}, $message );
|
|
print "$message\n" if $verbose > 0;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: validate_comment
|
|
# PURPOSE: Checks that all of the required fields are present in the
|
|
# JSON comment
|
|
# PARAMETERS: $comment - a pointer to the JSON hash
|
|
# RETURNS: True (1) if valid, otherwise false (0)
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub validate_comment {
|
|
my ($comment) = @_;
|
|
|
|
foreach my $key (@json_keys) {
|
|
return 0 unless defined($comment->{$key});
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: count_tags
|
|
# PURPOSE: Counts the number of start tags in what might be HTML
|
|
# PARAMETERS: $string String to examine
|
|
# RETURNS: An integer count of the number of start tags found
|
|
# DESCRIPTION: Uses HTML::Parser to parse the input string. A handler is
|
|
# defined which accumulates start and end tags in an array. The
|
|
# number of start tags found is returned to the caller.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub count_tags {
|
|
my ($string) = @_;
|
|
|
|
my @accum;
|
|
chomp($string);
|
|
return 0 unless $string;
|
|
|
|
my $p = HTML::Parser->new(
|
|
api_version => 3,
|
|
handlers => {
|
|
start => [ \@accum, "event,text" ],
|
|
end => [ \@accum, "event,text" ]
|
|
}
|
|
);
|
|
$p->parse($string);
|
|
$p->eof;
|
|
|
|
return scalar(@accum);
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: clean_comment_field
|
|
# PURPOSE: Cleans a string field from the comment
|
|
# PARAMETERS: $field - the field to be cleaned
|
|
# RETURNS: The cleaned string
|
|
# DESCRIPTION: We want to remove all HTML. We also want to add HTML entities
|
|
# where these may be needed. If there are already entities in
|
|
# the data we will decode them first then re-encode so we don't
|
|
# get double encoding.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub clean_comment_field {
|
|
my ($field) = @_;
|
|
|
|
#
|
|
# Remove all HTML
|
|
#
|
|
my $hr = HTML::Restrict->new();
|
|
$field = $hr->process($field);
|
|
|
|
#
|
|
# Entities. Make sure all existing entities are decoded first, then encode
|
|
# the result. Note that the unsafe characters are the reverse of what's
|
|
# listed. The second argument to encode_entities uses regular expression
|
|
# character class syntax (as if in [] in a RE). The set is all non-plain
|
|
# ASCII exclusing newline, carriage return and '<>&'.
|
|
#
|
|
decode_entities($field);
|
|
$field
|
|
= encode_entities( $field, '^\n\r\x20-\x25\x27-\x3b\x3d\x3f-\x7e' );
|
|
|
|
return $field;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: moveFile
|
|
# PURPOSE: Moves a file (renames it) taking account of whether the name
|
|
# exists at the destination
|
|
# PARAMETERS: $file file to move
|
|
# $dest destination of the file
|
|
# RETURNS: True (1) if the move succeeded, otherwise false (0)
|
|
# DESCRIPTION: A wrapper around the File::Copy 'move' function. If the target
|
|
# file exists then a version number is appended to the name so
|
|
# that the original is not clobbered.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub moveFile {
|
|
my ( $file, $dest ) = @_;
|
|
|
|
my ( $target, $basename, $prefix, $vsn );
|
|
|
|
return 0 unless -e $file;
|
|
|
|
#
|
|
# Handle a destination directory
|
|
#
|
|
if ( -d $dest ) {
|
|
( $basename = $file ) =~ s|.*/||mx;
|
|
$target = "$dest/$basename";
|
|
}
|
|
else {
|
|
$target = $dest;
|
|
}
|
|
|
|
#
|
|
# If the target exists we'll have to do some work, otherwise just do the
|
|
# move
|
|
#
|
|
if ( -e $target ) {
|
|
if ( ( $prefix, $vsn ) = ( $target =~ /(\.)(\d+)$/ ) ) {
|
|
while ( -e "$prefix$vsn" ) {
|
|
$vsn++;
|
|
$target = "$prefix$vsn";
|
|
}
|
|
return move( $file, $target );
|
|
}
|
|
else {
|
|
$vsn = 1;
|
|
$prefix = $target;
|
|
$target = "$prefix.$vsn";
|
|
while ( -e "$prefix.$vsn" ) {
|
|
$vsn++;
|
|
$target = "$prefix.$vsn";
|
|
}
|
|
return move( $file, $target );
|
|
}
|
|
}
|
|
else {
|
|
return move( $file, $dest );
|
|
}
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: abbreviate_filename
|
|
# PURPOSE: Abbreviates a long pathname by shrinking the directory names.
|
|
# PARAMETERS: $filename The name of the file
|
|
# RETURNS: A string containing the abbreviated name
|
|
# DESCRIPTION: Splits the filename, assembles the directories as just their
|
|
# first character, and then appends the filename. A bit
|
|
# primitive but it works.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub abbreviate_filename {
|
|
my ($filename) = @_;
|
|
|
|
my @elements = split('/',$filename);
|
|
my @shortened = map {substr($_,0,1)} @elements[0..$#elements-1];
|
|
|
|
return join('/',@shortened,$elements[-1]);
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: sort_by_timestamp
|
|
# PURPOSE: Sorts the main hash by timestamp rather than the normal sort
|
|
# by key
|
|
# PARAMETERS: $struct hashref pointing to the main structure
|
|
# RETURNS: A list of hash keys in order of timestamp
|
|
# DESCRIPTION: Generates a list of keys by sorting based on the comment
|
|
# timestamp in the JSON hash. Performs the sort by doing
|
|
# a string comparison since the dates are in ISO8601
|
|
# 'YYYY-MM-DDTHH:MM:SSZ' format (e.g. 2017-10-06T06:22:55Z).
|
|
# This seems to be safe.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub sort_by_timestamp {
|
|
my ($struct) = @_;
|
|
|
|
my @result = sort {
|
|
$struct->{$a}->{JSON}->{comment_timestamp}
|
|
cmp $struct->{$b}->{JSON}->{comment_timestamp}
|
|
} keys(%$struct);
|
|
|
|
return @result;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _debug
|
|
# PURPOSE: Prints debug reports
|
|
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
|
|
# @messages List of messages to print
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Outputs messages to STDERR if $active is true. For each
|
|
# message it removes any trailing newline and then adds one in
|
|
# the 'print' so the caller doesn't have to bother. Prepends
|
|
# each message with 'D> ' to show it's a debug message.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub _debug {
|
|
my ( $active, @messages ) = @_;
|
|
|
|
return unless $active;
|
|
foreach my $msg (@messages) {
|
|
chomp($msg);
|
|
print STDERR "D> $msg\n";
|
|
}
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: Options
|
|
# PURPOSE: Processes command-line options
|
|
# PARAMETERS: $optref Hash reference to hold the options
|
|
# RETURNS: Undef
|
|
# DESCRIPTION:
|
|
# THROWS: no exceptions
|
|
# COMMENTS: none
|
|
# SEE ALSO: n/a
|
|
#===============================================================================
|
|
sub Options {
|
|
my ($optref) = @_;
|
|
|
|
my @options = (
|
|
"help", "documentation|manpage",
|
|
"debug=i", "dry-run!",
|
|
"verbose+", "verify!",
|
|
"live!", "json!",
|
|
"config=s",
|
|
);
|
|
|
|
if ( !GetOptions( $optref, @options ) ) {
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
__END__
|
|
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
# Application Documentation
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
#{{{
|
|
|
|
=head1 NAME
|
|
|
|
process_comments
|
|
|
|
=over 4
|
|
|
|
Process incoming comment files as email messages or JSON files
|
|
|
|
=back
|
|
|
|
=head1 VERSION
|
|
|
|
This documentation refers to process_comments version 0.3.3
|
|
|
|
|
|
=head1 USAGE
|
|
|
|
./process_comments [-help] [-doc] [-debug=N] [-[no]dry-run]
|
|
[-verbose ...] [-[no]live] [-[no]json] [-config=FILE]
|
|
|
|
./process_comments -dry-run
|
|
./process_comments -debug=3 -dry-run
|
|
./process_comments -verbose -verbose
|
|
./process_comments -help
|
|
./process_comments -documentation
|
|
./process_comments -json
|
|
./process_comments -verb -live -config=.process_comments_live.cfg
|
|
|
|
=head1 OPTIONS
|
|
|
|
=over 8
|
|
|
|
=item B<-help>
|
|
|
|
Prints a brief help message describing the usage of the program, and then exits.
|
|
|
|
=item B<-documentation> or B<-manpage>
|
|
|
|
Prints the entire embedded documentation for the program, then exits.
|
|
|
|
=item B<-debug=N>
|
|
|
|
Enables debugging mode when N > 0 (zero is the default). The levels are:
|
|
|
|
=over 4
|
|
|
|
=item B<1>
|
|
|
|
Reports on the options chosen. Running with the B<-dry-run> option prevents
|
|
problems with comments if any options have been incorrectly used.
|
|
|
|
=item B<2>
|
|
|
|
Prints all of the information described at the previous level.
|
|
|
|
Prints the names of files found in the mail or JSON spool areas.
|
|
|
|
Prints the internal details of the email, listing the MIME parts (if there are any).
|
|
|
|
Prints the length of the MIME part matching the desired type, in lines.
|
|
|
|
Prints the entirety of the internal structure holding details of the mail file
|
|
and the comment it contains. This follows the moderation pass.
|
|
|
|
Prints the SQL that has been constructed to update the database.
|
|
|
|
=item B<3>
|
|
|
|
Prints all of the information described at the previous levels.
|
|
|
|
Prints the contents of the B<%config> hash built from the configuration file.
|
|
|
|
=back
|
|
|
|
=item B<-[no]dry-run>
|
|
|
|
Controls the program's B<dry-run> mode. It is off by default. In dry-run mode
|
|
the program reports what it would do but makes no changes. When off the
|
|
program makes all the changes it is designed to perform.
|
|
|
|
=item B<-verbose>
|
|
|
|
This option may be repeated. For each repetition the level of verbosity is
|
|
increased. By default no verbosity is in effect and the program prints out the
|
|
minimal amount of information.
|
|
|
|
Verbosity levels:
|
|
|
|
=over 4
|
|
|
|
=item B<1>
|
|
|
|
Prints the name of each mail (or JSON) file as it's processed.
|
|
|
|
Prints any error messages during message validation, which are also being
|
|
logged (unless in dry-run mode) and saved for reporting later.
|
|
|
|
Prints a notification if the comment is added to the database (or that this
|
|
would have happened in dry-run mode).
|
|
|
|
Prints messages about the moving of each mail (or JSON) file from the
|
|
processing area, along with any errors accumulated for that file. In dry-run
|
|
mode simply indicates what would have happened.
|
|
|
|
Prints the response code received from the server when invoking the interface
|
|
for updating comment files there. If in dry-run mode the message produced
|
|
merely indicates what would have happened.
|
|
|
|
If validation failed earlier on then further information is produced about the
|
|
final actions taken on these files.
|
|
|
|
=item B<2>
|
|
|
|
Prints the addresses each mail message is being sent to (unless in JSON mode).
|
|
|
|
=item B<3>
|
|
|
|
Prints the JSON contents of each mail message (or of each JSON file).
|
|
|
|
=back
|
|
|
|
=item B<-[no]verify>
|
|
|
|
This option controls whether the script imposes a delay on comments. The idea
|
|
is that if the comment feature is used to rant on a subject or to pass
|
|
misinformation, delaying them will help to defuse the situation.
|
|
|
|
The default state is B<-noverify>; a delay is not imposed. Selecting
|
|
B<-verify> means that comments have to be at least 24 hours old before they
|
|
are processed. The length of the delay cannot currently be changed without
|
|
altering the script.
|
|
|
|
This feature of the script has hardly been used and is perhaps against the
|
|
spirit of HPR, so it will probably be removed in a future version.
|
|
|
|
=item B<-[no]live>
|
|
|
|
This option determines whether the program runs in live mode or not. The
|
|
default varies depending on which system it is being run on.
|
|
|
|
IT SHOULD NOT USUALLY BE NECESSARY TO USE THIS!
|
|
|
|
In live mode the program makes changes to the live database and sends messages
|
|
to the live web interface when a comment has been processed. With live mode
|
|
off the program assumes it is writing to a clone of the database and it does
|
|
not inform the webserver that a comment has been processed.
|
|
|
|
The default for the copy of the program on borg is that live mode is ON.
|
|
Otherwise the default is that live mode is OFF. The setting is determined by
|
|
the sed script called B<fixup.sed> on borg. This needs to be run whenever
|
|
a new version of the program is released. This is done as follows:
|
|
|
|
sed -i -f fixup.sed process_comments
|
|
|
|
=item B<-[no]json>
|
|
|
|
This option selects JSON mode, which makes the script behave in a different
|
|
way from the default mode (B<-nojson> or MAIL mode) where it processes email
|
|
containing comments.
|
|
|
|
In JSON mode the script looks in a sub-directory called B<json/> where it
|
|
expects to find JSON files. The normal way in which these files arrive in this
|
|
directory is by using B<scp> to copy them from the HPR server (the directory
|
|
is B</home/hpr/comments>). This is a provision in case the normal route of
|
|
sending out email messages has failed for some reason. It also saves the user
|
|
from setting up the mail handling infrastructure that would otherwise be
|
|
needed.
|
|
|
|
In JSON mode the mail handling logic is not invoked, files are searched for in
|
|
the B<json/> directory and each file is processed, moderation is requested and
|
|
the comment is added to the database. In `B<-live>` mode the server is informed
|
|
that the comment has been processed.
|
|
|
|
The B<json/> directory needs to have three sub-directories: B<processed>,
|
|
B<banned> and B<rejected>. The script will place the processed files into
|
|
these sub-directories according to the moderation choice made. This makes it
|
|
easier to see what actions were taken and helps avoid repeated processing of
|
|
the same comment.
|
|
|
|
=item B<-config=FILE>
|
|
|
|
This option defines a configuration file other than the default
|
|
B<.hpr_db.cfg>. The file must be formatted as described below in the section
|
|
B<CONFIGURATION AND ENVIRONMENT>.
|
|
|
|
=back
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
A script to process new comments, moderate them and add them to the HPR
|
|
database.
|
|
|
|
In the new HPR comment system (released September 2017) a new web form is
|
|
presented in association with each show. The form can be used to submit
|
|
a comment on the show in question and takes some standard fields: the name of
|
|
the commenter, the title of the comment and the body of the comment itself.
|
|
|
|
Once the comment has been submitted its contents are formatted as a JSON
|
|
object and are sent as a mail attachment to the address
|
|
B<comments@hackerpublicradio.org>.
|
|
|
|
Recipients of these mail messages can then perform actions on the comments
|
|
they contain to cause them to be added to the HPR database. These actions are:
|
|
approve the comment, block it (because it is inappropriate or some form of
|
|
Spam and we want to prevent any further messages from the associated IP
|
|
address), or reject it (delete it). There is also an ignore option which skips
|
|
the current comment in this run of the script so it can be revisited later.
|
|
|
|
This script can process an entire email message which has been saved to a file
|
|
or a file containing the JSON object (as in the email attachment). When
|
|
processing email it is expected that it will be found in a maildrop directory,
|
|
and when finished the messages will be placed in sub-directories according to
|
|
what actions were carried out. A similar logic is used for JSON files; they
|
|
are expected to be in a drop area and are moved to sub-directories after
|
|
processing.
|
|
|
|
The directories used as spool areas are defined in the configuration file(s)
|
|
described below.
|
|
|
|
=head2 CHANGES
|
|
|
|
=head3 2024
|
|
|
|
An alternative method of comment approval has been added using PHP on the
|
|
server B<https://hub.hackerpublicradio.org>. This script was originally
|
|
designed around receiving email messages containing JSON or collecting JSON
|
|
files. The assumption was that it was the sole agent processing these messages
|
|
or files and would therefore be the only way that comments could enter the
|
|
database.
|
|
|
|
With the new server route it became possible for an emailed comment to be
|
|
received when that comment was already in the database.
|
|
|
|
As of version B<0.3.2> the script has been enhanced to check whether comments
|
|
received as mail messages (or JSON files) have already been processed. If this
|
|
is found to be the case the incoming data is skipped and stored in the
|
|
'B<processed>' directory (because the comment has been approved elsewhere).
|
|
|
|
There is still scope for confusion - if for example, a comment has been
|
|
rejected on the server, the script will see it as a potential addition to the
|
|
database. The moderators need to coordinate their decisions!
|
|
|
|
The ultimate solution will be to have just one way of moderating and
|
|
installing comments which is available to any of the "Janitors".
|
|
|
|
=head2 MAIL HANDLING
|
|
|
|
One way of handling incoming mail is to use a mail client which is capable of
|
|
saving messages sent to the above address in the spool area mentioned earlier.
|
|
For example, Thunderbird can do this by use of a filter and a plugin. Other
|
|
MUA's will have similar capabilities.
|
|
|
|
When this script is run on the mail spool area it will process all of the
|
|
files it finds. For each file it will check its validity in various ways,
|
|
display the comment then offer a moderation menu. The moderation options are
|
|
described below.
|
|
|
|
=head3 APPROVE
|
|
|
|
If a comment is approved then it will be added to the database, the associated
|
|
mail file will be moved to a sub-directory (by default called 'B<processed>'),
|
|
and the HPR server will be notified of this action.
|
|
|
|
=head3 BAN
|
|
|
|
If a comment is banned then it will not be added to the database. The mail
|
|
file will be moved to the sub-directory 'B<banned>' and the HPR server will be
|
|
informed that the IP address associated with the comment should be placed on
|
|
a black list.
|
|
|
|
=head3 REJECT
|
|
|
|
If a comment is rejected it is not written to the database, the mail file is
|
|
moved to the sub-directory 'B<rejected>' and the HPR server informed that the
|
|
comment can be deleted.
|
|
|
|
=head3 IGNORE
|
|
|
|
If a comment is ignored it is simply left in the mail spool and no further
|
|
processing done on it. It will be eligible for processing again when the
|
|
script is next run.
|
|
|
|
=head2 JSON FILE HANDLING
|
|
|
|
As described under the description of the B<-[no]json> option, the script
|
|
allows the processing of a multiple JSON files, each containing a single
|
|
comment. These files are expected to be in another spool area (defined int he
|
|
configuration file). The JSON is checked and all of the comment fields are
|
|
validated, then the moderation process is begun.
|
|
|
|
Moderation in this case consists of the same steps as described above except
|
|
that no mail file actions are taken and the JSON file is moved to
|
|
a sub-directory after processing.
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
=over 4
|
|
|
|
=item B<Unable to find configuration file ...>
|
|
|
|
Type: fatal
|
|
|
|
The nominated configuration file referenced in B<-config=FILE> was not found.
|
|
|
|
=item B<No mail found; nothing to do>
|
|
|
|
Type: fatal
|
|
|
|
No mail files were found in the mail spool area requiring processing.
|
|
|
|
=item B<No JSON files found; nothing to do>
|
|
|
|
Type: fatal
|
|
|
|
No JSON files were found in the JSON spool area requiring processing.
|
|
|
|
=item B<Failed to read JSON file '...' ...>
|
|
|
|
Type: fatal
|
|
|
|
A JSON file in the spool area could not be read with a JSON parser.
|
|
|
|
=item B<Failed to parse comment timestamp ...>
|
|
|
|
Type: fatal
|
|
|
|
The timestamp must be converted to a format compatible with MySQL/MariaDB but
|
|
during this process the parse failed.
|
|
|
|
=item B<Failed to open input file '...' ...>
|
|
|
|
Type: fatal
|
|
|
|
A mail file in the spool area could not be opened.
|
|
|
|
=item B<Failed to move ...>
|
|
|
|
Type: warning
|
|
|
|
A mail file could not be moved to the relevant sub-directory.
|
|
|
|
=item B<Failed to close input file '...' ...>
|
|
|
|
Type: warning
|
|
|
|
A mail file in the spool area could not be closed.
|
|
|
|
=item B<Various error messages from the database subsystem>
|
|
|
|
Type: fatal, warning
|
|
|
|
An action on the database has been flagged as an error.
|
|
|
|
=item B<Various error messages from the Template toolkit>
|
|
|
|
Type: fatal
|
|
|
|
An action relating to the template used for the display of the comment has
|
|
been flagged as an error.
|
|
|
|
=item B<Invalid call to 'call_back' subroutine; missing key>
|
|
|
|
Type: warning
|
|
|
|
The routine 'call_back' was called incorrectly. The key was missing.
|
|
|
|
=item B<Invalid call to 'call_back' subroutine; invalid action>
|
|
|
|
Type: warning
|
|
|
|
The routine 'call_back' was called incorrectly. The action was invalid.
|
|
|
|
=item B<Error from remote server indicating failure>
|
|
|
|
Type: warning
|
|
|
|
While attempting to send an action to the remote server with the 'call_back'
|
|
subroutine an error message was received.
|
|
|
|
=back
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT
|
|
|
|
=head2 CONFIGURATION
|
|
|
|
The configuration file design was changed for version B<0.2.7> and beyond.
|
|
|
|
The script expects a configuration file to provide it with various program
|
|
settings, database credentials and HPR CMS credentials.
|
|
|
|
The default configuration file name is B<.process_comments.cfg> and is for use
|
|
with the local clone of the hpr database.
|
|
|
|
This file has the general structure:
|
|
|
|
<<include .process_comments_settings.cfg>>
|
|
|
|
<<include .hpr_db.cfg>>
|
|
|
|
<cms>
|
|
user = "dummy:dummy"
|
|
</cms>
|
|
|
|
It uses B<Config::General> 'B<include>' directives to include the file
|
|
B<.process_comments_settings.cfg> which holds the program settings. It then
|
|
includes the file B<.hpr_db.cfg> which containss credentials for the cloned
|
|
database. Finally it defines dummy credentials for the HPR CMS since this
|
|
feature is not used by default.
|
|
|
|
The more useful configuration is in B<.process_comments_live.cfg>:
|
|
|
|
<<include .process_comments_settings.cfg>>
|
|
|
|
<<include .hpr_livedb.cfg>>
|
|
|
|
<cms>
|
|
<<include .hpradmin_curlrc>>
|
|
</cms>
|
|
|
|
The basic format of the main configuration data after the inclusions have been
|
|
performed is then:
|
|
|
|
<settings>
|
|
# Holds program settings
|
|
</settings>
|
|
<database>
|
|
# Holds database settings
|
|
</database>
|
|
<cms>
|
|
# Holds CMS credentials
|
|
</cms>
|
|
|
|
=head3 CONTENTS OF B<.process_comments_settings.cfg>:
|
|
|
|
This section is enclosed in '<settings>' tags:
|
|
|
|
<settings>
|
|
PROG = process_comments
|
|
|
|
#
|
|
# Defaults
|
|
#
|
|
basedir = "$HOME/HPR/Comment_system"
|
|
configfile = "$basedir/.hpr_db.cfg"
|
|
logfile = "$basedir/logs/${PROG}.log"
|
|
template = "$basedir/${PROG}.tpl"
|
|
|
|
#
|
|
# Mail message stash area
|
|
#
|
|
maildrop = "$HOME/HPR/CommentDrop"
|
|
processed = "$maildrop/processed"
|
|
rejected = "$maildrop/rejected"
|
|
banned = "$maildrop/banned"
|
|
|
|
#
|
|
# JSON stash area
|
|
#
|
|
jsondir = "$basedir/json"
|
|
jprocessed = "$jsondir/processed"
|
|
jrejected = "$jsondir/rejected"
|
|
jbanned = "$jsondir/banned"
|
|
|
|
#
|
|
# How to tell the server the comment's processed
|
|
#
|
|
callback_template = \
|
|
"https://hackerpublicradio.org/comment_process.php?key=%s&action=%s"
|
|
|
|
</settings>
|
|
|
|
=head3 CONTENTS OF B<.hpr_db.cfg> AND B<.hpr_livedb.cfg>
|
|
|
|
The default file providing this part is just for use with the default
|
|
database. The section is enclosed in '<database>' tags:
|
|
|
|
<database>
|
|
host = 127.0.0.1
|
|
port = PORT
|
|
name = DATABASE
|
|
user = USERNAME
|
|
password = PASSWORD
|
|
</database>
|
|
|
|
These settings can alternatively be used to connect to an SSH tunnel which has
|
|
been connected from a remote system (like borg) to the live database.
|
|
Assuming the port chosen for this is 3307 something like the following could
|
|
be used:
|
|
|
|
<database>
|
|
host = 127.0.0.1
|
|
port = 3307
|
|
name = hpr_hpr
|
|
user = hpr_hpr
|
|
password = "**censored**"
|
|
</database>
|
|
|
|
A typical Bash script for opening a tunnel might be:
|
|
|
|
#!/bin/bash
|
|
SSHPORT=22
|
|
LOCALPORT=3307
|
|
REMOTEPORT=3306
|
|
ssh -p ${SSHPORT} -f -N -L localhost:${LOCALPORT}:localhost:${REMOTEPORT} hpr@hackerpublicradio.org
|
|
|
|
=head3 CONTENTS OF B<.hpradmin_curlrc>
|
|
|
|
This is a file used by the 'curl' command to contain credentials to access the
|
|
CMS on the HPR server. The format needs to be:
|
|
|
|
user = hpradmin:**censored**
|
|
|
|
This is one of the formats allowed by 'curl' and is also compatible with the
|
|
B<Config::General> module used by this script.
|
|
|
|
=head2 TEMPLATE
|
|
|
|
The program displays the comment that is currently being processed for
|
|
moderation. It uses a template along with the Perl B<Template> module to do
|
|
this. By default this template is called B<process_comments.tpl>. This can be
|
|
changed by changing the line in the "settings" configuration file which sets
|
|
the 'template' variable.
|
|
|
|
The template is provided with the following data:
|
|
|
|
file a scalar containing the name of the file being processed
|
|
|
|
db a hash containing the details of the show to which the
|
|
comment relates, returned from a database query:
|
|
id the episode number
|
|
date the date of the episode
|
|
title the episode title
|
|
host the host name
|
|
|
|
comment a hash containing the fields from the comment:
|
|
eps_id the episode number
|
|
comment_timestamp date and time of the comment
|
|
comment_author_name comment author
|
|
comment_title comment title
|
|
comment_text comment text
|
|
justification justification for posting (if
|
|
relevant)
|
|
key unique comment key
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
Carp
|
|
Config::General
|
|
DBI
|
|
Data::Dumper
|
|
DateTime::Format::ISO8601
|
|
Encode
|
|
File::Copy
|
|
File::Find::Rule
|
|
File::Slurper
|
|
Getopt::Long
|
|
HTML::Entities
|
|
HTML::Restrict
|
|
IO::Prompter
|
|
JSON
|
|
LWP::UserAgent
|
|
List::Util
|
|
Log::Handler
|
|
MIME::Parser
|
|
Mail::Address
|
|
Mail::Field
|
|
Mail::Internet
|
|
Pod::Usage
|
|
SQL::Abstract
|
|
Template
|
|
|
|
The Perl B<experimental:try> option is used instead of the original
|
|
B<Try::Tiny> package.
|
|
|
|
=head1 BUGS AND LIMITATIONS
|
|
|
|
There are no known bugs in this module.
|
|
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
|
|
Patches are welcome.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Dave Morriss (Dave.Morriss@gmail.com)
|
|
|
|
=head1 LICENCE AND COPYRIGHT
|
|
|
|
Copyright (c) 2017 - 2024 Dave Morriss (Dave.Morriss@gmail.com). All rights
|
|
reserved.
|
|
|
|
This module is free software; you can redistribute it and/or
|
|
modify it under the same terms as Perl itself. See perldoc perlartistic.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
=cut
|
|
|
|
#}}}
|
|
|
|
# [zo to open fold, zc to close]
|
|
|
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|