forked from HPR/hpr-tools
		
	
		
			
				
	
	
		
			2344 lines
		
	
	
		
			77 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			2344 lines
		
	
	
		
			77 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env perl
 | |
| #===============================================================================
 | |
| #
 | |
| #         FILE: process_comments
 | |
| #
 | |
| #        USAGE: ./process_comments [-help] [-docmentation|manpage] [-debug=N]
 | |
| #               [-[no]dry-run] [-verbose ...] [-[no]verify] [-[no]live]
 | |
| #               [-[no]json] [-config=FILE]
 | |
| #
 | |
| #  DESCRIPTION: Read new comments received as mail messages (or as JSON files)
 | |
| #               and perform moderation actions on them before possibly adding
 | |
| #               them to the HPR database.
 | |
| #
 | |
| #      OPTIONS: ---
 | |
| # REQUIREMENTS: ---
 | |
| #         BUGS: ---
 | |
| #        NOTES: 2023-12-23 enabled the experimental::try feature and removed
 | |
| #               the use of 'Try::Tiny'. Revert by removing "use feature 'try'"
 | |
| #               and 'no warnings "experimental::try"'. Remove the '($e)' from
 | |
| #               each 'catch' and end the 'catch' braces with a semicolon. Add
 | |
| #               back 'use Try::Tiny'.
 | |
| #
 | |
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | |
| #      VERSION: 0.3.3
 | |
| #      CREATED: 2017-09-04 18:06:49
 | |
| #     REVISION: 2024-01-10 13:02:51
 | |
| #
 | |
| #===============================================================================
 | |
| 
 | |
| use v5.36; # say and try/catch
 | |
| use feature 'try';
 | |
| use strict;
 | |
| use warnings;
 | |
| no warnings "experimental::try";
 | |
| use utf8;
 | |
| 
 | |
| use Carp;
 | |
| use Getopt::Long;
 | |
| use Pod::Usage;
 | |
| 
 | |
| use Config::General;
 | |
| use List::Util qw{max};
 | |
| use HTML::Parser;
 | |
| use HTML::Restrict;
 | |
| use HTML::Entities;
 | |
| use File::Find::Rule;
 | |
| use File::Copy;
 | |
| use File::Slurper 'read_text';
 | |
| #use IO::All;
 | |
| use JSON;
 | |
| 
 | |
| use Mail::Address;
 | |
| use Mail::Field;
 | |
| use Mail::Internet;
 | |
| use MIME::Parser;
 | |
| use Encode qw(decode encode);
 | |
| 
 | |
| use DateTime::Format::ISO8601;
 | |
| use IO::Prompter;
 | |
| use Template;
 | |
| use SQL::Abstract;
 | |
| use LWP::UserAgent;
 | |
| 
 | |
| #use TryCatch; # Failing 2020-05-31 due to a problem in Devel::Declare
 | |
| #use Try::Tiny; # Using in-built experimental try/catch
 | |
| use Log::Handler;
 | |
| 
 | |
| use DBI;
 | |
| 
 | |
| use Data::Dumper;
 | |
| 
 | |
| #
 | |
| # Version number (manually incremented)
 | |
| #
 | |
| our $VERSION = '0.3.3';
 | |
| 
 | |
| #
 | |
| # Script and directory names
 | |
| #
 | |
| ( my $PROG = $0 ) =~ s|.*/||mx;
 | |
| ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
 | |
| $DIR = '.' unless $DIR;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Declarations
 | |
| #-------------------------------------------------------------------------------
 | |
| #
 | |
| # Constants and other declarations
 | |
| #
 | |
| my $basedir    = "$ENV{HOME}/HPR/Comment_system";
 | |
| my $configfile = "$basedir/.${PROG}.cfg";
 | |
| 
 | |
| my ( $dbh, $sth1, $h1 );
 | |
| 
 | |
| my ( @ignore, @files );
 | |
| my ($comment, %updates, $shortfile);
 | |
| 
 | |
| #
 | |
| # The MIME part we're looking for
 | |
| #
 | |
| my $target_mimetype = 'application/json';
 | |
| 
 | |
| #
 | |
| # All of the keys in the JSON we're interested in (when checking and adding to
 | |
| # the database)
 | |
| #
 | |
| my @json_keys = (
 | |
|     "eps_id",              "comment_timestamp",
 | |
|     "comment_author_name", "comment_title",
 | |
|     "comment_text",
 | |
| );
 | |
| 
 | |
| #
 | |
| # Keys we need to clean
 | |
| #
 | |
| my @cleanable_keys = (
 | |
|     "comment_author_name", "comment_title",
 | |
|     "comment_text",
 | |
| );
 | |
| 
 | |
| #
 | |
| # Enable Unicode mode
 | |
| #
 | |
| binmode STDOUT, ":encoding(UTF-8)";
 | |
| binmode STDERR, ":encoding(UTF-8)";
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Options and arguments
 | |
| #-------------------------------------------------------------------------------
 | |
| my $DEF_DEBUG = 0;
 | |
| my $DEF_LIVE  = 0;                              # Gets edited on borg
 | |
| 
 | |
| #
 | |
| # Process options
 | |
| #
 | |
| my %options;
 | |
| Options( \%options );
 | |
| 
 | |
| #
 | |
| # Default help
 | |
| #
 | |
| pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 )
 | |
|     if ( $options{'help'} );
 | |
| 
 | |
| #
 | |
| # Full documentation if requested with -doc
 | |
| #
 | |
| pod2usage(
 | |
|     -msg       => "$PROG version $VERSION\n",
 | |
|     -verbose   => 2,
 | |
|     -exitval   => 1,
 | |
|     -noperldoc => 0,
 | |
| ) if ( $options{'documentation'} );
 | |
| 
 | |
| #
 | |
| # Collect options
 | |
| #
 | |
| my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
 | |
| my $cfgfile
 | |
|     = ( defined( $options{config} ) ? $options{config} : $configfile );
 | |
| my $live     = ( defined( $options{live} )   ? $options{live}   : $DEF_LIVE );
 | |
| my $verify   = ( defined( $options{verify} ) ? $options{verify} : 0 );
 | |
| my $dry_run  = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
 | |
| my $verbose  = ( defined( $options{verbose} )   ? $options{verbose}   : 0 );
 | |
| my $jsonmode = ( defined( $options{json} )      ? $options{json}      : 0 );
 | |
| 
 | |
| #
 | |
| # Report on the options if requested
 | |
| #
 | |
| if ($DEBUG > 0) {
 | |
|     _debug(1, 'Options');
 | |
|     _debug(1,
 | |
|         '-config=' . $cfgfile,
 | |
|         ($live eq 0 ? '-nolive ' : '-live'),
 | |
|         ($verify eq 0 ? '-noverify ' : '-verify'),
 | |
|         ($dry_run eq 0 ? '-nodry-run ' : '-dry-run'),
 | |
|         '-verbose=[' . $verbose . ']',
 | |
|         ($jsonmode eq 0 ? '-nojson ' : '-json'),
 | |
|     );
 | |
|     _debug(1, '');
 | |
| }
 | |
| 
 | |
| #
 | |
| # Sanity checks
 | |
| #
 | |
| die "Unable to find configuration file: $cfgfile\n" unless ( -e $cfgfile );
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Configuration file - load data
 | |
| #-------------------------------------------------------------------------------
 | |
| my $conf = Config::General->new(
 | |
|     -ConfigFile      => $cfgfile,
 | |
|     -InterPolateVars => 1,
 | |
|     -InterPolateEnv  => 1,
 | |
|     -ExtendedAccess  => 1,
 | |
| );
 | |
| my %config = $conf->getall();
 | |
| 
 | |
| _debug($DEBUG > 2, 'Configuration settings:', Dumper(\%config));
 | |
| 
 | |
| my $logfile    = $config{settings}->{logfile};
 | |
| my $template   = $config{settings}->{template};
 | |
| 
 | |
| my $maildrop   = $config{settings}->{maildrop};
 | |
| my $processed  = $config{settings}->{processed};
 | |
| my $rejected   = $config{settings}->{rejected};
 | |
| my $banned     = $config{settings}->{banned};
 | |
| 
 | |
| my $jsondir    = $config{settings}->{jsondir};
 | |
| my $jprocessed = $config{settings}->{jprocessed};
 | |
| my $jrejected  = $config{settings}->{jrejected};
 | |
| my $jbanned    = $config{settings}->{jbanned};
 | |
| 
 | |
| my $callback_template = $config{settings}->{callback_template};
 | |
| 
 | |
| my ($cb_user, $cb_password) = split(":",$config{cms}->{user});
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Moderation responses and the arguments associated with them (from the
 | |
| # configuration data).
 | |
| #-------------------------------------------------------------------------------
 | |
| #
 | |
| # key => [ 3 element array ]
 | |
| #
 | |
| # - key is the menu element from do_moderation
 | |
| # - the array contains:
 | |
| #       - text to be used in messages
 | |
| #       - anonymous subroutine which returns the directory into which to place
 | |
| #         the file; call it with the $jsonmode variable
 | |
| #       - action keyword to send back to the server
 | |
| #
 | |
| my %moderation_keys = (
 | |
|     'Approve' => [
 | |
|         'processed',
 | |
|         sub { ( $_[0] ? $jprocessed : $processed ) },
 | |
|         'approve'
 | |
|     ],
 | |
|     'Ban' => [
 | |
|         'banned',
 | |
|         sub { ( $_[0] ? $jbanned : $banned ) },
 | |
|         'block'
 | |
|     ],
 | |
|     'Reject' => [
 | |
|         'rejected',
 | |
|         sub { ( $_[0] ? $jrejected : $rejected ) },
 | |
|         'delete'
 | |
|         ],
 | |
| );
 | |
| 
 | |
| #
 | |
| # Make a RE from the keys
 | |
| #
 | |
| my $mkeys = join('|',keys(%moderation_keys));
 | |
| my $mkeys_re = qr{^$mkeys$};
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Connect to the database
 | |
| #-------------------------------------------------------------------------------
 | |
| my $dbhost = $config{database}->{host} // '127.0.0.1';
 | |
| my $dbport = $config{database}->{port} // 3306;
 | |
| my $dbname = $config{database}->{name};
 | |
| my $dbuser = $config{database}->{user};
 | |
| my $dbpwd  = $config{database}->{password};
 | |
| $dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
 | |
|     $dbuser, $dbpwd, { AutoCommit => 1 } )
 | |
|     or croak $DBI::errstr;
 | |
| 
 | |
| #$dbh = DBI->connect( "DBI:MariaDB:database=$dbname;host=$dbhost;port=$dbport",
 | |
| #    $dbuser, $dbpwd, { AutoCommit => 1 } )
 | |
| #    or croak $DBI::errstr;
 | |
| 
 | |
| #
 | |
| # Enable client-side UTF8
 | |
| #
 | |
| $dbh->{mysqlenable_utf8} = 1;
 | |
| 
 | |
| #
 | |
| # Set the local timezone to UTC for this connection
 | |
| #
 | |
| $dbh->do("set time_zone = '+00:00'") or carp $dbh->errstr;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Set up logging keeping the default log layout except for the date
 | |
| #-------------------------------------------------------------------------------
 | |
| my $log = Log::Handler->new();
 | |
| 
 | |
| $log->add(
 | |
|     file => {
 | |
|         timeformat => "%F %H:%M:%S (%s)",
 | |
|         filename   => $logfile,
 | |
|         maxlevel   => 7,
 | |
|         minlevel   => 0
 | |
|     }
 | |
| );
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # If mode JSON is selected with -json option we'll check the defined 'json'
 | |
| # directory, otherwise we'll look for work in the maildrop.
 | |
| #-------------------------------------------------------------------------------
 | |
| if ($jsonmode) {
 | |
|     #
 | |
|     # Look for JSON files
 | |
|     #
 | |
|     @ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' );
 | |
|     my $jsonfileRE = qr{(\.(?i:json)|[^.]+)$};
 | |
|     #<<<
 | |
|     @files = File::Find::Rule
 | |
|         ->file()
 | |
|         ->name($jsonfileRE)
 | |
|         ->not( File::Find::Rule->new->name(@ignore) )
 | |
|         ->maxdepth(1)
 | |
|         ->in($jsondir);
 | |
|     #>>>
 | |
| 
 | |
|     if ( $DEBUG > 1 ) {
 | |
|         _debug(1,
 | |
|             "Files found in $jsondir",
 | |
|             Dumper( \@files ),
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # There may be nothing there
 | |
|     #
 | |
|     die "No JSON files found; nothing to do\n" unless (@files);
 | |
| 
 | |
|     #
 | |
|     # Convert the JSON files into the %updates hash
 | |
|     #
 | |
|     do_JSON_process( \@files, \%updates, $log, $dry_run, $verbose );
 | |
| 
 | |
| }
 | |
| else {
 | |
|     #
 | |
|     # Look for mail files
 | |
|     #
 | |
|     @ignore = ( '*~', '.*.swp', 'banned', 'processed', 'rejected' );
 | |
| 
 | |
|     my $mailfileRE = qr{(\.(?i:eml)|[^.]+)$};
 | |
|     #<<<
 | |
|     @files = File::Find::Rule
 | |
|         ->file()
 | |
|         ->name($mailfileRE)
 | |
|         ->not( File::Find::Rule->new->name(@ignore) )
 | |
|         ->maxdepth(1)
 | |
|         ->in($maildrop);
 | |
|     #>>>
 | |
| 
 | |
|     if ( $DEBUG > 1 ) {
 | |
|         _debug(1,
 | |
|             "Files found in $maildrop",
 | |
|             Dumper( \@files ),
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # There may be nothing there
 | |
|     #
 | |
|     unless (@files) {
 | |
|         warn "No mail found; nothing to do\n";
 | |
|         exit;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Convert the mail messages into the %updates hash
 | |
|     #
 | |
|     do_mail_process( \@files, \%updates, $log, $dry_run, $verbose );
 | |
| 
 | |
| }
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Check that comments aren't already in the database
 | |
| #-------------------------------------------------------------------------------
 | |
| do_database_check( $dbh, \%updates, $log, $verify, $dry_run );
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| #  Perform the moderation pass
 | |
| #-------------------------------------------------------------------------------
 | |
| do_moderation( $dbh, \%updates, $template, $log, $verify, $dry_run );
 | |
| 
 | |
| #
 | |
| # Dump the data structure if requested
 | |
| #
 | |
| _debug($DEBUG > 1, '%updates: ' . Dumper(\%updates));
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # For the comments which are valid and approved, add them to the database
 | |
| #-------------------------------------------------------------------------------
 | |
| print '-' x 80, "\n" if $verbose > 0;
 | |
| do_update( $dbh, \%updates, $log, $dry_run, $verbose );
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Walk the captured data reporting what happened (would happen) moving the
 | |
| # files to the appropriate places and telling the server to do things to the
 | |
| # originals (if appropriate)
 | |
| #-------------------------------------------------------------------------------
 | |
| foreach my $file ( sort_by_timestamp( \%updates ) ) {
 | |
|     #
 | |
|     # Deal with each file depending on the selected action (from the set
 | |
|     # 'Approve', 'Ban' and 'Reject')
 | |
|     #
 | |
|     if ( $updates{$file}->{action} =~ $mkeys_re ) {
 | |
|         #
 | |
|         # Get the elements we need for the function arguments below.
 | |
|         # NOTE: Apologies! This is obscure. The action key will be 'Approve',
 | |
|         # 'Ban' or 'Reject', and the lookup will return the name of the
 | |
|         # destination (for messages), an anonymous subroutine reference which determines
 | |
|         # the directory, and a keyword to send to the web server to dispose of
 | |
|         # the comment. The subref must be called with $jsonmode as the
 | |
|         # argument to get the destination which is different depending on
 | |
|         # whether we're in mail or JSON mode. This really needs to be made
 | |
|         # simpler and easier to understand when this code is revisited!
 | |
|         #
 | |
|         my ( $name, $destsub, $serverac )
 | |
|             = ( @{ $moderation_keys{ $updates{$file}->{action} } } );
 | |
|         my $dest = &$destsub($jsonmode);
 | |
| 
 | |
|         #
 | |
|         # Print moderation settings
 | |
|         #
 | |
|         _debug($DEBUG > 1,
 | |
|             '$name: ' . $name,
 | |
|             '$dest: ' . $dest,
 | |
|             '$serverac: ' . $serverac,
 | |
|         );
 | |
| 
 | |
|         #
 | |
|         # Report what we're doing with the file and do it if appropriate
 | |
|         #
 | |
|         report_action( $updates{$file}, $name, $dest, $log,
 | |
|             $dry_run, $verbose );
 | |
| 
 | |
|         #
 | |
|         # Tell the server what to do, but only if this is the live instance
 | |
|         # and we're not in dry-run mode
 | |
|         #
 | |
|         if ($live) {
 | |
|             unless (
 | |
|                 call_back(
 | |
|                     $callback_template, $updates{$file}->{JSON}->{key},
 | |
|                     $serverac, $cb_user, $cb_password, $log, $dry_run, $verbose
 | |
|                 )
 | |
|                 )
 | |
|             {
 | |
|                 print "Server update failed\n" if $verbose > 0;
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             unless ($dry_run) {
 | |
|                 print "Not live; no server action taken\n" if $verbose > 0;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     elsif ( $updates{$file}->{action} eq 'Ignore' ) {
 | |
|         #
 | |
|         # The action here is 'Ignore' which means just leave it alone. Since
 | |
|         # only the moderator can choose this, or the failure to find the
 | |
|         # episode in the database, we know it's not a broken mail message or
 | |
|         # bad JSON.
 | |
|         #
 | |
|         if ( $verbose > 0 ) {
 | |
|             print "File '$file' ignored\n";
 | |
|             print_errors( $updates{$file}->{errors} );
 | |
|         }
 | |
|     }
 | |
|     elsif ( $updates{$file}->{skip} > 0 ) {
 | |
|         #
 | |
|         # The comment was found to be already in the database so we are
 | |
|         # skipping it. We want to report this and move the file to the
 | |
|         # 'processed' directory.
 | |
|         #
 | |
|         $shortfile = $updates{$file}->{shortfile};
 | |
| 
 | |
|         unless ($dry_run) {
 | |
|             if ( $verbose > 0 ) {
 | |
|                 print "Moving '$shortfile' to 'processed'\n";
 | |
|                 print "Reason: comment is already in the database\n";
 | |
|                 print_errors( $updates{$file}->{errors} );
 | |
|             }
 | |
|             $log->info("Moving $file to 'processed'");
 | |
|             unless ( moveFile( $file, ($jsonmode ? $jprocessed : $processed) ) ) {
 | |
|                 warn "Unable to move $file\n";
 | |
|                 $log->error("Failed to move $file");
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             if ( $verbose > 0 ) {
 | |
|                 print "Would move '$shortfile' to 'processed' (dry run)\n";
 | |
|                 print "Reason: comment is already in the database\n";
 | |
|                 print_errors( $updates{$file}->{errors} );
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     else {
 | |
|         #
 | |
|         # We aborted this one early in the processing and validation, so it's
 | |
|         # really messed up. We're going to reject it outright with error
 | |
|         # messages. We can't tell the server because we don't have the
 | |
|         # key to do it.
 | |
|         #
 | |
|         $shortfile = $updates{$file}->{shortfile};
 | |
| 
 | |
|         unless ($dry_run) {
 | |
|             if ( $verbose > 0 ) {
 | |
|                 print "Moving $shortfile to 'rejected'\n";
 | |
|                 print "Reason: failed validation\n";
 | |
|                 print_errors( $updates{$file}->{errors} );
 | |
|             }
 | |
|             $log->info("Moving $file to 'rejected'");
 | |
|             unless ( moveFile( $file, ($jsonmode ? $jrejected : $rejected) ) ) {
 | |
|                 warn "Unable to move $file\n";
 | |
|                 $log->error("Failed to move $file");
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             if ( $verbose > 0 ) {
 | |
|                 print "Would move $shortfile to 'rejected' (dry run)\n";
 | |
|                 print "Reason: failed validation\n";
 | |
|                 print_errors( $updates{$file}->{errors} );
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print '.' x 80, "\n" if $verbose > 0;
 | |
| }
 | |
| 
 | |
| exit;
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: do_JSON_process
 | |
| #      PURPOSE: Process any pending JSON files found in a directory search
 | |
| #   PARAMETERS: $files          arrayref containing the JSON files found
 | |
| #               $updates        hashref to contain the details collected from
 | |
| #                               the JSON files
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: The files containing JSON comments have been found earlier and
 | |
| #               passed to this function. They are processed in sorted order.
 | |
| #               The JSON is decoded into a hash and is validated, and the
 | |
| #               elements cleaned up. The JSON is stored for later in the
 | |
| #               $updates structure. If all tests and checks are passed the
 | |
| #               entry for this file in the $updates structure is marked as
 | |
| #               valid. If anything fails the entry is marked as invalid for
 | |
| #               the next stages of processing.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub do_JSON_process {
 | |
|     my ( $files, $updates, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     my ( $shortfile, $dt );
 | |
|     my $json = JSON->new;
 | |
| 
 | |
|     #
 | |
|     # Examine the files we found
 | |
|     #
 | |
|     foreach my $file ( sort(@$files) ) {
 | |
|         print ">> $file\n" if $verbose > 0;
 | |
|         $log->notice("Processing $file") unless $dry_run;
 | |
| 
 | |
|         #
 | |
|         # Initialise for this file, marking it as invalid by default.
 | |
|         #
 | |
|         $updates->{$file}              = {};
 | |
|         $updates->{$file}->{valid}     = 0;
 | |
|         $updates->{$file}->{moderated} = 0;
 | |
|         $updates->{$file}->{errors}    = [];
 | |
|         $updates->{$file}->{action}    = '';
 | |
|         $updates->{$file}->{file}      = $file;       # For convenience
 | |
|         $updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file);
 | |
| 
 | |
|        #
 | |
|        # Open the nominated file and load its contents into a scalar. Because
 | |
|        # 'read_text' does its own internal error handling we need try/catch to
 | |
|        # intercept it and provide our own.
 | |
|        #
 | |
|         my $json_text;
 | |
|         try {
 | |
|             $json_text = read_text($file);
 | |
|         }
 | |
| 
 | |
|         catch ($e){
 | |
|             warn "Failed to read JSON file $file\n";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # Decode and process the JSON, returning FALSE if it fails so we get
 | |
|         # a log entry (unless it's dry run mode)
 | |
|         #
 | |
|         unless (
 | |
|             convert_JSON(
 | |
|                 $json, $json_text, $updates, $file,
 | |
|                 $log,  $dry_run,   $verbose
 | |
|             )
 | |
|             )
 | |
|         {
 | |
|             $log->warning("JSON decode failure; aborting '$shortfile'")
 | |
|                 unless $dry_run;
 | |
|         }
 | |
| 
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: do_mail_process
 | |
| #      PURPOSE: Process any pending email found in a directory search
 | |
| #   PARAMETERS: $files          arrayref containing the mail files found
 | |
| #               $updates        hashref to contain the details collected from
 | |
| #                               the mail files
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: The files containing email have been found earlier and passed
 | |
| #               to this function. They are processed in sorted order. Each
 | |
| #               file is parsed as a mail file and the various destination
 | |
| #               addresses collected. (TODO Check these against permitted
 | |
| #               addresses). Each message is expected to be in MIME format with
 | |
| #               an attachment, which is extracted. The target mimetype
 | |
| #               (application/json) is checked for (TODO This is a global and
 | |
| #               shouldn't be). The JSON it contains is decoded into a hash and
 | |
| #               is validated, and the elements cleaned up. The JSON is stored
 | |
| #               for later in the $updates structure. If all tests and checks
 | |
| #               are passed the entry for this file in the $updates structure
 | |
| #               is marked as valid. If anything fails the entry is marked as
 | |
| #               invalid for the next stages of processing.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub do_mail_process {
 | |
|     my ( $files, $updates, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     my ( $shortfile, $dt );
 | |
| 
 | |
|     #
 | |
|     # Prepare for parsing mail and JSON
 | |
|     #
 | |
|     my $parser = MIME::Parser->new;
 | |
|     $parser->output_under("/tmp");
 | |
| 
 | |
|     my $json = JSON->new;
 | |
| 
 | |
|     #
 | |
|     # Examine the files we found
 | |
|     #
 | |
|     foreach my $file ( sort(@$files) ) {
 | |
|         print ">> $file\n" if $verbose > 0;
 | |
|         $log->notice("Processing $file") unless $dry_run;
 | |
| 
 | |
|         #
 | |
|         # Initialise for this file, marking it as invalid by default.
 | |
|         #
 | |
|         $updates->{$file} = {};
 | |
|         $updates->{$file}->{valid} = 0;
 | |
|         $updates->{$file}->{moderated} = 0;
 | |
|         $updates->{$file}->{errors} = [];
 | |
|         $updates->{$file}->{action} = '';
 | |
|         $updates->{$file}->{file} = $file;            # For convenience
 | |
|         $updates->{$file}->{shortfile} = $shortfile = abbreviate_filename($file);
 | |
| 
 | |
|         #
 | |
|         # Open the current file and load its contents into a Mail::Internet object
 | |
|         # TODO: Consider using IO::All
 | |
|         #
 | |
|         open( my $mfh, '<', $file )
 | |
|             or die "Failed to open input file '$file' : $!\n";
 | |
| 
 | |
|         my $mi_obj = Mail::Internet->new($mfh);
 | |
| 
 | |
|         close($mfh)
 | |
|             or warn "Failed to close input file '$file' : $!\n";
 | |
| 
 | |
|         #
 | |
|         # This should be to comments@hackerpublicradio.org in one of these headers
 | |
|         #
 | |
|         my @addresses = (
 | |
|             Mail::Address->parse( $mi_obj->head->get('Resent-to') ),
 | |
|             Mail::Address->parse( $mi_obj->head->get('Resent-cc') )
 | |
|         );
 | |
|         if ( !@addresses ) {
 | |
|             @addresses = (
 | |
|                 Mail::Address->parse( $mi_obj->head->get('To') ),
 | |
|                 Mail::Address->parse( $mi_obj->head->get('Cc') )
 | |
|             );
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # Report the headers we're interested in if requested
 | |
|         #
 | |
|         if ($verbose > 1) {
 | |
|             foreach my $addr (@addresses) {
 | |
|                 my $dest = lc( $addr->address );
 | |
|                 print "To: $dest\n";
 | |
|             }
 | |
|             print '~ ' x 40,"\n";
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # TODO Check the message is actually for us
 | |
|         #
 | |
| 
 | |
|         my ( $ct, $cte, @body, $body, $results );
 | |
| 
 | |
|         #
 | |
|         # Parse the message in the assumption it's MIME. Report any errors and
 | |
|         # warnings.
 | |
|         #
 | |
|         my $entity = $parser->parse_data( $mi_obj->as_string() );
 | |
|         unless ($entity) {
 | |
|             $results = $parser->results;
 | |
|             foreach my $pmsg ( $results->msgs ) {
 | |
|                 print STDERR "$pmsg\n";
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # If we got multiple MIME parts then look for the first 'application/json'
 | |
|         # element and save it as the body we'll work on.
 | |
|         #
 | |
|         if ( scalar( $entity->parts ) > 0 ) {
 | |
|             #
 | |
|             # MIME message
 | |
|             #
 | |
|             if ( $DEBUG > 1 ) {
 | |
|                 _debug(1, 'Message is MIME with multiple parts');
 | |
|                 foreach my $ent ( $entity->parts ) {
 | |
|                     _debug(1, "Type: " . $ent->mime_type);
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             #
 | |
|             # Get the first MIME part of the type we want and turn it into an
 | |
|             # array.
 | |
|             # TODO: Might it be better to make a string here since later we simply
 | |
|             # convert the array to a string again.
 | |
|             #
 | |
|             foreach my $ent ( $entity->parts ) {
 | |
|                 if ( $ent->mime_type eq $target_mimetype ) {
 | |
|                     $ct = $ent->mime_type;
 | |
|                 #
 | |
|                 # FIXME: Using 'bodyhandle' is supposed to decode the entity
 | |
|                 # body, but this is untested. Also, we have to use 'as_string'
 | |
|                 # then split on "\n" and stick the newlines back on the array
 | |
|                 # elements, which seems a bit weird. Have to monitor this to
 | |
|                 # see how it behaves.
 | |
|                 #
 | |
|                 #@body = @{ $ent->body() };
 | |
|                     @body = map {"$_\n"}
 | |
|                         split( "\n", $ent->bodyhandle->as_string );
 | |
|                     _debug($DEBUG > 1,
 | |
|                         'Length of body = ' . scalar(@body) . ' lines'
 | |
|                     );
 | |
| 
 | |
|                     #
 | |
|                     # If we got a non-empty body then exit the loop
 | |
|                     #
 | |
|                     last if @body;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             #
 | |
|             # We found no suitable part so there's nothing to process here. We'll
 | |
|             # let the later phases detect this though
 | |
|             #
 | |
|             unless (@body) {
 | |
|                 saveError( "MIME message has no valid '$target_mimetype' element",
 | |
|                     $updates->{$file}->{errors} , $log, $dry_run, $verbose );
 | |
|                 next;
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             #
 | |
|             # The message has no MIME parts
 | |
|             #
 | |
|             saveError( "Message has no attachment",
 | |
|                 $updates->{$file}->{errors}, $log, $dry_run, $verbose );
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # Display the body unless we're not very verbose
 | |
|         #
 | |
|         $body = join( "", @body );
 | |
|         print "$body\n" if $verbose > 2;
 | |
| 
 | |
|         #
 | |
|         # Decode and process the JSON, returning FALSE if it fails so we get
 | |
|         # a log entry (unless it's dry run mode)
 | |
|         #
 | |
|         unless (
 | |
|             convert_JSON(
 | |
|                 $json, $body, $updates, $file,
 | |
|                 $log,  $dry_run,   $verbose
 | |
|             )
 | |
|             )
 | |
|         {
 | |
|             $log->warning("JSON decode failure; aborting '$shortfile'")
 | |
|                 unless $dry_run;
 | |
|         }
 | |
| 
 | |
|     }
 | |
|     continue {
 | |
|         print '-' x 80, "\n" if $verbose > 0;
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: do_database_check
 | |
| #      PURPOSE: Look to see if the collected comments are already in the
 | |
| #               database
 | |
| #   PARAMETERS: $dbh            database handle
 | |
| #               $updates        structure (hashref) holding collected
 | |
| #                               information about the files and comments in
 | |
| #                               them
 | |
| #               $log            log handle
 | |
| #               $verify         Boolean flagging the need to verify
 | |
| #               TODO: Is this right?
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Looks for each comment in %$updates in the database. The
 | |
| #               search criteria are the comment timestamp (in database format)
 | |
| #               and the comment author name. The timestamp is expected to have
 | |
| #               been converted from full ISO format (2024-01-10T11:16:11Z) to
 | |
| #               the format MySQL likes (2024-01-10 11:16:11). the original
 | |
| #               date is in the element 'orig_timestamp'. If there is a match
 | |
| #               in the database then the comment is to be skipped, and we mark
 | |
| #               the element appropriately and display and log a message. If no
 | |
| #               match we can proceed without skipping;
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub do_database_check {
 | |
|     my ( $dbh, $updates, $log, $verify, $dry_run ) = @_;
 | |
| 
 | |
|     my ( $sth1, $h1, $shortfile, $message );
 | |
| 
 | |
|     #
 | |
|     # Look for a comment with details matching an incoming one. If found
 | |
|     # someone else has approved it so we need to skip it.
 | |
|     #
 | |
|     $sth1 = $dbh->prepare(
 | |
|         q{SELECT id FROM comments
 | |
|         WHERE comment_timestamp = ?
 | |
|         AND comment_author_name = ?}
 | |
|     ) or die $DBI::errstr;
 | |
|     if ( $dbh->err ) {
 | |
|         warn $dbh->errstr;
 | |
|     }
 | |
| 
 | |
|     foreach my $file ( sort_by_timestamp($updates) ) {
 | |
|         #
 | |
|         # Make a shortcut to the JSON part, and collect the short form of the
 | |
|         # filename.
 | |
|         #
 | |
|         my $ptr = $updates->{$file}->{JSON};
 | |
|         #my $errors = $updates->{$file}->{errors};
 | |
|         $shortfile = $updates{$file}->{shortfile};
 | |
| 
 | |
|         #
 | |
|         # Look for the comment in the database
 | |
|         #
 | |
|         $sth1->execute( $ptr->{comment_timestamp},
 | |
|             $ptr->{comment_author_name} );
 | |
|         if ( $dbh->err ) {
 | |
|             warn $dbh->errstr;
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # Did we find it?
 | |
|         #
 | |
|         if ( $h1 = $sth1->fetchrow_hashref() ) {
 | |
|             #
 | |
|             # Found in the database. Mark this comment to be skipped and save
 | |
|             # an error report
 | |
|             #
 | |
|             #$updates->{$file}->{valid} = 0;
 | |
|             $updates->{$file}->{skip} = 1;
 | |
| 
 | |
|             $message = "Comment in file '$shortfile' is already in the database";
 | |
|             print "$message\n" if $verbose > 0;
 | |
|             $log->warn($message) unless $dry_run;
 | |
|         }
 | |
|         else {
 | |
|             #
 | |
|             # Not found
 | |
|             #
 | |
|             $updates->{$file}->{skip} = 0;
 | |
|         }
 | |
| 
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: do_moderation
 | |
| #      PURPOSE: Moderate all the valid comments in the 'updates' structure
 | |
| #   PARAMETERS: $dbh            database handle
 | |
| #               $updates        structure (hashref) holding collected
 | |
| #                               information about the files and comments in
 | |
| #                               them
 | |
| #               $tt_template    path to the template we're using
 | |
| #               $log            log handle
 | |
| #               $verify         Boolean flagging the need to verify
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Performs the moderation pass on the accumulated comments in
 | |
| #               the $updates structure. This is a hash (referenced by
 | |
| #               $updates) containing the filename as the key. Each of these
 | |
| #               lead to a hashref containing the JSON data (in a hashref), an
 | |
| #               arrayref of error messages and various flags denoting whether
 | |
| #               the comment is valid, has been moderated and what action the
 | |
| #               moderator took.
 | |
| #               The flags and error messages are manipulated here depending on
 | |
| #               what choice is made during the moderation process.
 | |
| #               The contents of the JSON hash (or a subset thereof) and the
 | |
| #               data about the show returned from a database query are
 | |
| #               displayed for the moderator using a template, and a menu of
 | |
| #               'Approve', 'Ban', 'Reject', Ignore' is presented.
 | |
| #               On exit the structure referenced by $updates has been modified
 | |
| #               to signal what should be done with each of the comments.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: TODO: References globals and should not.
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub do_moderation {
 | |
|     my ( $dbh, $updates, $tt_template, $log, $verify, $dry_run ) = @_;
 | |
| 
 | |
|     my ( $sth1, $h1, $cdt, $age );
 | |
|     my $agelimit = 60 * 60 * 24;
 | |
| 
 | |
|     #
 | |
|     # We'll get details of the show each comment is associated with using this
 | |
|     # query. It will be passed to the TT² template.
 | |
|     #
 | |
|     $sth1 = $dbh->prepare(
 | |
|         q{SELECT e.id, e.date, e.title, h.host FROM eps e
 | |
|         JOIN hosts h ON e.hostid = h.hostid
 | |
|         WHERE e.id = ?}
 | |
|     ) or die $DBI::errstr;
 | |
|     if ( $dbh->err ) {
 | |
|         warn $dbh->errstr;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # We'll display each comment with a TT² template
 | |
|     #
 | |
|     my $tt = Template->new(
 | |
|         {   ABSOLUTE     => 1,
 | |
|             ENCODING     => 'utf8',
 | |
|             INCLUDE_PATH => $basedir,
 | |
|         }
 | |
|     );
 | |
|     my $vars = {};
 | |
|     my $choice;
 | |
| 
 | |
|     #
 | |
|     # Process the valid comments, and not marked as 'skip'
 | |
|     #
 | |
|     foreach my $file ( sort_by_timestamp($updates) ) {
 | |
|         if ( $updates->{$file}->{valid} > 0 && $updates->{$file}->{skip} == 0 ) {
 | |
|             #
 | |
|             # Make shortcuts
 | |
|             #
 | |
|             my $ptr = $updates->{$file}->{JSON};
 | |
|             my $errors = $updates->{$file}->{errors};
 | |
| 
 | |
|             #
 | |
|             # Check the age of the comment if we're verifying, and ignore if
 | |
|             # less than the age limit. The original timestamp should have been
 | |
|             # reserved even though we had to reformat for MySQL/MariaDB.
 | |
|             #
 | |
|             if ($verify) {
 | |
|                 $cdt = DateTime::Format::ISO8601->parse_datetime(
 | |
|                     $ptr->{orig_timestamp}
 | |
|                 );
 | |
|                 $age = DateTime->now->epoch - $cdt->epoch;
 | |
|                 if ($age < $agelimit) {
 | |
|                     print "Choice 'Ignore' auto-selected due to comment age\n";
 | |
|                     $updates->{$file}->{action} = 'Ignore';
 | |
|                     $log->info("Ignored comment in $file")
 | |
|                         unless $dry_run;
 | |
|                     $updates->{$file}->{moderated} = 0;
 | |
|                     next;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             #
 | |
|             # Get the show details from the database
 | |
|             #
 | |
|             $sth1->execute( $ptr->{eps_id} );
 | |
|             if ( $dbh->err ) {
 | |
|                 warn $dbh->errstr;
 | |
|             }
 | |
| 
 | |
|             #
 | |
|             # Do we know this episode?
 | |
|             # NOTE: If the database we're using is the clone this might fail
 | |
|             # because it's out of step with the live one
 | |
|             #
 | |
|             if ( $h1 = $sth1->fetchrow_hashref() ) {
 | |
|                 #
 | |
|                 # Populate the variables for the template
 | |
|                 #
 | |
|                 $vars->{file}    = $file;
 | |
|                 $vars->{db}      = $h1;
 | |
|                 $vars->{comment} = $ptr;
 | |
| 
 | |
|                 #
 | |
|                 # Call the template and print the result
 | |
|                 #
 | |
|                 my $document;
 | |
|                 $tt->process( $tt_template, $vars, \$document,
 | |
|                     { binmode => ':utf8' } )
 | |
|                     || die $tt->error(), "\n";
 | |
|                 print $document;
 | |
| 
 | |
|                 #
 | |
|                 # Prompt for moderation. Note: these prompts are used to flag
 | |
|                 # the choice in the %updates hash, so don't change them unless
 | |
|                 # you also change them elsewhere.
 | |
|                 #
 | |
|                 $choice = prompt(
 | |
|                     'Choose...',
 | |
|                     -menu => [ 'Approve', 'Ban', 'Reject', 'Ignore' ],
 | |
|                     '>'
 | |
|                 );
 | |
|                 if ($choice) {
 | |
|                     print "Choice '$choice' selected\n";
 | |
|                     if ( $choice eq 'Approve' ) {
 | |
|                         $updates->{$file}->{action} = $choice;
 | |
|                         $log->info("Approved comment in $file")
 | |
|                             unless $dry_run;
 | |
|                         $updates->{$file}->{moderated} = 1;
 | |
|                     }
 | |
|                     elsif ( $choice eq 'Ban' ) {
 | |
|                         $updates->{$file}->{action} = $choice;
 | |
|                         saveError(
 | |
|                             "Comment in '$file' banned by the moderator",
 | |
|                             $errors,
 | |
|                             $log, $dry_run, 0
 | |
|                         );
 | |
|                         $updates->{$file}->{valid}     = 0;
 | |
|                         $updates->{$file}->{moderated} = 1;
 | |
|                     }
 | |
|                     elsif ( $choice eq 'Reject' ) {
 | |
|                         $updates->{$file}->{action} = $choice;
 | |
|                         saveError(
 | |
|                             "Comment in '$file' rejected by the moderator",
 | |
|                             $errors,
 | |
|                             $log, $dry_run, 0
 | |
|                         );
 | |
|                         $updates->{$file}->{valid}     = 0;
 | |
|                         $updates->{$file}->{moderated} = 1;
 | |
|                     }
 | |
|                     elsif ( $choice eq 'Ignore' ) {
 | |
|                         $updates->{$file}->{action} = $choice;
 | |
|                         $log->info("Ignored comment in $file")
 | |
|                             unless $dry_run;
 | |
|                         $updates->{$file}->{moderated} = 0;
 | |
|                     }
 | |
|                 }
 | |
|                 else {
 | |
|                     print "\n";
 | |
|                     undef $choice; # Seem to need this after aborting 'prompt'
 | |
|                     print "Ignored this comment\n";
 | |
|                     $log->info("Ignored comment in $file (default action)")
 | |
|                         unless $dry_run;
 | |
|                     $updates->{$file}->{moderated} = 0;
 | |
|                 }
 | |
|                 #print "\n";
 | |
|             }
 | |
|             else {
 | |
|                 #
 | |
|                 # We couldn't find the episode in the database
 | |
|                 #
 | |
|                 printf "Episode %s is not known\n", $ptr->{eps_id};
 | |
|                 $updates->{$file}->{action} = 'Ignore';
 | |
|                 saveError(
 | |
|                     "Episode referenced in '$file' is not known",
 | |
|                     $errors,
 | |
|                     $log, $dry_run, 0
 | |
|                 );
 | |
|                 $updates->{$file}->{moderated} = 0;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: do_update
 | |
| #      PURPOSE: Add valid, approved comments to the database (or pretend to do
 | |
| #               so if in dry-run mode)
 | |
| #   PARAMETERS: $dbh            database handle
 | |
| #               $updates        structure (hashref) holding collected
 | |
| #                               information about the files and comments in
 | |
| #                               them
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Performs a database update for all valid approved comments in
 | |
| #               the structure referenced by $updates unless in dry-run mode
 | |
| #               where what would be done will be reported.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: TODO: References globals and should not.
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub do_update {
 | |
|     my ( $dbh, $updates, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     my $sql = SQL::Abstract->new;
 | |
|     my ( $stmt, @bind, $show, $count );
 | |
| 
 | |
|     $count = 0;
 | |
| 
 | |
|     foreach my $file ( sort_by_timestamp($updates) ) {
 | |
|         if (   $updates->{$file}->{valid} > 0
 | |
|             && $updates->{$file}->{action} eq 'Approve' )
 | |
|         {
 | |
|             $count++;
 | |
| 
 | |
|             $show = $updates->{$file}->{JSON}->{eps_id};
 | |
| 
 | |
|             my $shortfile = $updates{$file}->{shortfile};
 | |
| 
 | |
|             #
 | |
|             # Build the statement
 | |
|             # NOTE: The JSON keys we wish to use match the column names in the
 | |
|             # 'comments' table. The second argument to 'insert' needs to be
 | |
|             # a hashref, so we use @json_keys to list the keys we need, with
 | |
|             # the whole expression returning an anonymous hash (hashref)
 | |
|             #
 | |
|             ( $stmt, @bind )
 | |
|                 = $sql->insert( 'comments',
 | |
|                 { %{ $updates->{$file}->{JSON} }{@json_keys} } );
 | |
| 
 | |
|             #
 | |
|             # Display the statement we constructed if requested
 | |
|             #
 | |
|             _debug($DEBUG > 1,
 | |
|                 "$stmt\n'",
 | |
|                 join( "','", @bind )
 | |
|             );
 | |
| 
 | |
|             #
 | |
|             # Take action or report potential changes
 | |
|             #
 | |
|             unless ($dry_run) {
 | |
|                 #
 | |
|                 # Apply the SQL
 | |
|                 #
 | |
|                 $dbh->do( $stmt, undef, @bind );
 | |
|                 if ( $dbh->err ) {
 | |
|                     #
 | |
|                     # Something broke during the database update. Warn about
 | |
|                     # it, save the error and make the action 'Ignore' so the
 | |
|                     # file is kept for further investigation (or to be
 | |
|                     # edited). Marking the file as invalid may be overkill.
 | |
|                     #
 | |
|                     warn $dbh->errstr;
 | |
|                     saveError(
 | |
|                         $dbh->errstr,
 | |
|                         $updates{$file}->{errors},
 | |
|                         $log, $dry_run, $verbose
 | |
|                     );
 | |
| #                    $updates{$file}->{valid} = 0;
 | |
|                     $updates->{$file}->{action} = 'Ignore';
 | |
|                 }
 | |
|                 else {
 | |
|                     $log->info("$file: inserted comment for show $show");
 | |
| 
 | |
|                     if ( $verbose > 0 ) {
 | |
|                         print "Updated show $show from file $shortfile\n";
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             else {
 | |
|                 print
 | |
|                     "Would have added comment for show $show from file $shortfile\n"
 | |
|                     if $verbose > 0;
 | |
|             }
 | |
| 
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if ($verbose > 0 && $count > 0) {
 | |
|         print "\n", '-' x 80, "\n";
 | |
|     }
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: convert_JSON
 | |
| #      PURPOSE: Converts the JSON from a file or an email into an internal
 | |
| #               data structure
 | |
| #   PARAMETERS: $json           JSON object
 | |
| #               $jtext          JSON in text format
 | |
| #               $updates        hashref to contain the details collected from
 | |
| #                               the JSON and elsewhere
 | |
| #               $file           the file we're processing (hash key)
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: 
 | |
| #  DESCRIPTION: 
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub convert_JSON {
 | |
|     my ( $json, $jtext, $updates, $file, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     my ($comment, $dt, $shortfile);
 | |
| 
 | |
|     $shortfile = $updates{$file}->{shortfile};
 | |
| 
 | |
|     #
 | |
|     # Decode the JSON into a hashref
 | |
|     #
 | |
|     try {
 | |
|         $comment = $json->decode($jtext);
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # If the JSON decode failed we have a problem
 | |
|     #
 | |
|     catch ($e){
 | |
|         saveError(
 | |
|             "JSON in file '$shortfile' failed to decode",
 | |
|             $updates->{$file}->{errors},
 | |
|             $log, $dry_run, $verbose
 | |
|         );
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Validate the comment fields: do we have all we expected?
 | |
|     #
 | |
|     unless (validate_comment($comment)) {
 | |
|         saveError(
 | |
|             "JSON in file '$shortfile' failed to validate",
 | |
|             $updates->{$file}->{errors},
 | |
|             $log, $dry_run, $verbose
 | |
|         );
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Perform some cleaning
 | |
|     #
 | |
|     foreach my $key (@cleanable_keys) {
 | |
|         $comment->{$key} = clean_comment_field( $comment->{$key} );
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # ISO 8601 timestamps cause MySQL problems (or have started to) so we
 | |
|     # parse them and convert to an acceptable format. We keep the original
 | |
|     # format in $verify is set and we have to check its age.
 | |
|     #
 | |
|     try {
 | |
|         $dt = DateTime::Format::ISO8601->parse_datetime(
 | |
|             $comment->{comment_timestamp} );
 | |
|     }
 | |
|     catch ($e){
 | |
|         die "Failed to parse comment timestamp\n";
 | |
|     }
 | |
|     $comment->{orig_timestamp} = $comment->{comment_timestamp};
 | |
|     $comment->{comment_timestamp} = $dt->ymd . ' ' . $dt->hms;
 | |
| 
 | |
|     #
 | |
|     # Stash the JSON for later
 | |
|     #
 | |
|     $updates->{$file}->{JSON} = $comment;
 | |
| 
 | |
|     #
 | |
|     # This file is valid we think
 | |
|     #
 | |
|     $updates->{$file}->{valid} = 1;
 | |
|     $log->info("$file seems valid") unless $dry_run;
 | |
| 
 | |
|     return 1;
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: report_action
 | |
| #      PURPOSE: Does all of the necessary reporting as the email or JSON files
 | |
| #               are being acted upon
 | |
| #   PARAMETERS: $struct         hashref holding collected comment data for
 | |
| #                               a given mail file
 | |
| #               $destname       name of destination
 | |
| #               $dest           destination directory
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: In dry run mode reports what would have been done along with
 | |
| #               any errors, but only in verbose mode. Otherwise reports the
 | |
| #               action (in verbose mode) and logs it too. It also moves the
 | |
| #               mail file to the requested directory.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub report_action {
 | |
|     my ( $struct, $destname, $dest, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     #
 | |
|     # Get values with a hashref slice
 | |
|     #
 | |
|     my ( $file, $shortfile ) = @$struct{ 'file', 'shortfile' };
 | |
| 
 | |
|     unless ($dry_run) {
 | |
|         if ($verbose > 0) {
 | |
|             print "Moving '$shortfile' to '$destname'\n";
 | |
|             print_errors($struct->{$file}->{errors});
 | |
|         }
 | |
|         $log->info("Moving '$file' to '$destname'");
 | |
|         unless ( moveFile( $file, $dest ) ) {
 | |
|             warn "Unable to move $shortfile\n";
 | |
|             $log->error("Failed to move $shortfile");
 | |
|         }
 | |
|     }
 | |
|     else {
 | |
|         if ($verbose > 0) {
 | |
|             print "Would move '$shortfile' to '$destname' (dry run)\n";
 | |
|             print_errors($struct->{$file}->{errors});
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: call_back
 | |
| #      PURPOSE: Invokes a PHP interface on the HPR server through which
 | |
| #               a comment can be marked as fully processed
 | |
| #   PARAMETERS: $format         sprintf template for building the URL
 | |
| #               $key            the key from the JSON
 | |
| #               $action         the action to be taken
 | |
| #               $user           the username from the configuration
 | |
| #               $pass           the password from the configuration
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: True (1) if the call performed as it should, otherwise false
 | |
| #               (0)
 | |
| #  DESCRIPTION: Checks that the correct parameters have been supplied, then
 | |
| #               builds the URL we're going to use from the format string, key
 | |
| #               and action. If in dry-run mode this URL is reported and
 | |
| #               nothing more. Otherwise the URL is queried with an HTTP 'GET'.
 | |
| #               If the response shows success then the action is considered to
 | |
| #               have been taken. The returned message is displayed (in verbose
 | |
| #               mode) and success is logged. If the 'GET' failed then
 | |
| #               a warning message is generated along with a warning log entry.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub call_back {
 | |
|     my ( $format, $key, $action, $user, $pass, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     my ( $ua, $url, $response, $code, $message, $content );
 | |
| 
 | |
|     #
 | |
|     # Check calling parameters
 | |
|     #
 | |
|     unless ($key) {
 | |
|         warn "Invalid call to 'call_back' subroutine; missing key\n";
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     unless ( $action =~ /^(approve|block|delete)$/ ) {
 | |
|         warn "Invalid call to 'call_back' subroutine; invalid action\n";
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Build the URL
 | |
|     #
 | |
|     $url = sprintf( $format, $key, $action );
 | |
| 
 | |
|     #
 | |
|     # Perform the call to clean up the JSON files, etc, or just say what would
 | |
|     # have been done
 | |
|     #
 | |
|     unless ($dry_run) {
 | |
|         print "Making the server call:\nGET $url\n" if $verbose > 0;
 | |
| 
 | |
|         $ua = LWP::UserAgent->new;
 | |
|         $ua->timeout(180);                  # Module default is 180 sec = 3 mins
 | |
|         $ua->credentials("hub.hackerpublicradio.org:443", "cms", $user, $pass);
 | |
| 
 | |
|         $response = $ua->get($url);
 | |
|         if ( $response->is_success ) {
 | |
|             #
 | |
|             # Server call succeeded
 | |
|             #
 | |
|             $code = $response->code;
 | |
|             $message = $response->message;
 | |
|             $content = $response->decoded_content;
 | |
|             if ($verbose > 0) {
 | |
|                 print "Success: $code/$message\n";
 | |
|                 print "Content: $content\n" if $content;
 | |
|             }
 | |
|             $log->info("Successful server message $key/$action: " .
 | |
|                 $response->status_line);
 | |
|             return 1;
 | |
|         }
 | |
|         else {
 | |
|             #
 | |
|             # Server call failed
 | |
|             #
 | |
|             warn $response->status_line . "\n";
 | |
|             $log->error("Failed server message $key/$action: " .
 | |
|                 $response->status_line);
 | |
|             # Try to log as much as possible
 | |
|             $log->error( $response->decoded_content )
 | |
|                 if $response->decoded_content;
 | |
|             return 0;
 | |
|         }
 | |
|     }
 | |
|     else {
 | |
|         #
 | |
|         # Always a success in dry-run mode
 | |
|         #
 | |
|         print "Would have made the server call:\nGET $url\n" if $verbose > 0;
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: print_errors
 | |
| #      PURPOSE: Prints the contents of the error array if there's anything in
 | |
| #               it (i.e. not empty)
 | |
| #   PARAMETERS: $errors         arrayref possibly containing errors
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Simplifies printing in the case when there's nothing to print
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub print_errors {
 | |
|     my ($errors) = @_;
 | |
| 
 | |
|     return unless defined($errors);
 | |
| 
 | |
|     print "Errors: ", join( "\n", @{$errors} ), "\n"
 | |
|         if @{$errors};
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: saveError
 | |
| #      PURPOSE: Saves an error message in an error structure and logs it if in
 | |
| #               live mode
 | |
| #   PARAMETERS: $message        error message text
 | |
| #               $struct         structure (arrayref) to hold the text
 | |
| #               $log            log handle
 | |
| #               $dry_run        Boolean flagging dry-run mode
 | |
| #               $verbose        Integer level of verbosity
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Simplifies the saving of an error message and logging it
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub saveError {
 | |
|     my ( $message, $struct, $log, $dry_run, $verbose ) = @_;
 | |
| 
 | |
|     $log->warn($message) unless $dry_run;
 | |
|     push( @{$struct}, $message );
 | |
|     print "$message\n" if $verbose > 0;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: validate_comment
 | |
| #      PURPOSE: Checks that all of the required fields are present in the
 | |
| #               JSON comment
 | |
| #   PARAMETERS: $comment        - a pointer to the JSON hash
 | |
| #      RETURNS: True (1) if valid, otherwise false (0)
 | |
| #  DESCRIPTION:
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub validate_comment {
 | |
|     my ($comment) = @_;
 | |
| 
 | |
|     foreach my $key (@json_keys) {
 | |
|         return 0 unless defined($comment->{$key});
 | |
|     }
 | |
| 
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: count_tags
 | |
| #      PURPOSE: Counts the number of start tags in what might be HTML
 | |
| #   PARAMETERS: $string         String to examine
 | |
| #      RETURNS: An integer count of the number of start tags found
 | |
| #  DESCRIPTION: Uses HTML::Parser to parse the input string. A handler is
 | |
| #               defined which accumulates start and end tags in an array. The
 | |
| #               number of start tags found is returned to the caller.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub count_tags {
 | |
|     my ($string) = @_;
 | |
| 
 | |
|     my @accum;
 | |
|     chomp($string);
 | |
|     return 0 unless $string;
 | |
| 
 | |
|     my $p = HTML::Parser->new(
 | |
|         api_version => 3,
 | |
|         handlers    => {
 | |
|             start => [ \@accum, "event,text" ],
 | |
|             end   => [ \@accum, "event,text" ]
 | |
|         }
 | |
|     );
 | |
|     $p->parse($string);
 | |
|     $p->eof;
 | |
| 
 | |
|     return scalar(@accum);
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: clean_comment_field
 | |
| #      PURPOSE: Cleans a string field from the comment
 | |
| #   PARAMETERS: $field - the field to be cleaned
 | |
| #      RETURNS: The cleaned string
 | |
| #  DESCRIPTION: We want to remove all HTML. We also want to add HTML entities
 | |
| #               where these may be needed. If there are already entities in
 | |
| #               the data we will decode them first then re-encode so we don't
 | |
| #               get double encoding.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub clean_comment_field {
 | |
|     my ($field) = @_;
 | |
| 
 | |
|     #
 | |
|     # Remove all HTML
 | |
|     #
 | |
|     my $hr = HTML::Restrict->new();
 | |
|     $field = $hr->process($field);
 | |
| 
 | |
|     #
 | |
|     # Entities. Make sure all existing entities are decoded first, then encode
 | |
|     # the result. Note that the unsafe characters are the reverse of what's
 | |
|     # listed. The second argument to encode_entities uses regular expression
 | |
|     # character class syntax (as if in [] in a RE). The set is all non-plain
 | |
|     # ASCII exclusing newline, carriage return and '<>&'.
 | |
|     #
 | |
|     decode_entities($field);
 | |
|     $field
 | |
|         = encode_entities( $field, '^\n\r\x20-\x25\x27-\x3b\x3d\x3f-\x7e' );
 | |
| 
 | |
|     return $field;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: moveFile
 | |
| #      PURPOSE: Moves a file (renames it) taking account of whether the name
 | |
| #               exists at the destination
 | |
| #   PARAMETERS: $file           file to move
 | |
| #               $dest           destination of the file
 | |
| #      RETURNS: True (1) if the move succeeded, otherwise false (0)
 | |
| #  DESCRIPTION: A wrapper around the File::Copy 'move' function. If the target
 | |
| #               file exists then a version number is appended to the name so
 | |
| #               that the original is not clobbered.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub moveFile {
 | |
|     my ( $file, $dest ) = @_;
 | |
| 
 | |
|     my ( $target, $basename, $prefix, $vsn );
 | |
| 
 | |
|     return 0 unless -e $file;
 | |
| 
 | |
|     #
 | |
|     # Handle a destination directory
 | |
|     #
 | |
|     if ( -d $dest ) {
 | |
|         ( $basename = $file ) =~ s|.*/||mx;
 | |
|         $target = "$dest/$basename";
 | |
|     }
 | |
|     else {
 | |
|         $target = $dest;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # If the target exists we'll have to do some work, otherwise just do the
 | |
|     # move
 | |
|     #
 | |
|     if ( -e $target ) {
 | |
|         if ( ( $prefix, $vsn ) = ( $target =~ /(\.)(\d+)$/ ) ) {
 | |
|             while ( -e "$prefix$vsn" ) {
 | |
|                 $vsn++;
 | |
|                 $target = "$prefix$vsn";
 | |
|             }
 | |
|             return move( $file, $target );
 | |
|         }
 | |
|         else {
 | |
|             $vsn    = 1;
 | |
|             $prefix = $target;
 | |
|             $target = "$prefix.$vsn";
 | |
|             while ( -e "$prefix.$vsn" ) {
 | |
|                 $vsn++;
 | |
|                 $target = "$prefix.$vsn";
 | |
|             }
 | |
|             return move( $file, $target );
 | |
|         }
 | |
|     }
 | |
|     else {
 | |
|         return move( $file, $dest );
 | |
|     }
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: abbreviate_filename
 | |
| #      PURPOSE: Abbreviates a long pathname by shrinking the directory names.
 | |
| #   PARAMETERS: $filename       The name of the file
 | |
| #      RETURNS: A string containing the abbreviated name
 | |
| #  DESCRIPTION: Splits the filename, assembles the directories as just their
 | |
| #               first character, and then appends the filename. A bit
 | |
| #               primitive but it works.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub abbreviate_filename {
 | |
|     my ($filename) = @_;
 | |
| 
 | |
|     my @elements = split('/',$filename);
 | |
|     my @shortened = map {substr($_,0,1)} @elements[0..$#elements-1];
 | |
| 
 | |
|     return join('/',@shortened,$elements[-1]);
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: sort_by_timestamp
 | |
| #      PURPOSE: Sorts the main hash by timestamp rather than the normal sort
 | |
| #               by key
 | |
| #   PARAMETERS: $struct         hashref pointing to the main structure
 | |
| #      RETURNS: A list of hash keys in order of timestamp
 | |
| #  DESCRIPTION: Generates a list of keys by sorting based on the comment
 | |
| #               timestamp in the JSON hash. Performs the sort by doing
 | |
| #               a string comparison since the dates are in ISO8601
 | |
| #               'YYYY-MM-DDTHH:MM:SSZ' format (e.g. 2017-10-06T06:22:55Z).
 | |
| #               This seems to be safe.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub sort_by_timestamp {
 | |
|     my ($struct) = @_;
 | |
| 
 | |
|     my @result = sort {
 | |
|         $struct->{$a}->{JSON}->{comment_timestamp}
 | |
|             cmp $struct->{$b}->{JSON}->{comment_timestamp}
 | |
|     } keys(%$struct);
 | |
| 
 | |
|     return @result;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _debug
 | |
| #      PURPOSE: Prints debug reports
 | |
| #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print
 | |
| #               @messages       List of messages to print
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Outputs messages to STDERR if $active is true. For each
 | |
| #               message it removes any trailing newline and then adds one in
 | |
| #               the 'print' so the caller doesn't have to bother. Prepends
 | |
| #               each message with 'D> ' to show it's a debug message.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub _debug {
 | |
|     my ( $active, @messages ) = @_;
 | |
| 
 | |
|     return unless $active;
 | |
|     foreach my $msg (@messages) {
 | |
|         chomp($msg);
 | |
|         print STDERR "D> $msg\n";
 | |
|     }
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: Options
 | |
| #      PURPOSE: Processes command-line options
 | |
| #   PARAMETERS: $optref     Hash reference to hold the options
 | |
| #      RETURNS: Undef
 | |
| #  DESCRIPTION:
 | |
| #       THROWS: no exceptions
 | |
| #     COMMENTS: none
 | |
| #     SEE ALSO: n/a
 | |
| #===============================================================================
 | |
| sub Options {
 | |
|     my ($optref) = @_;
 | |
| 
 | |
|     my @options = (
 | |
|         "help",     "documentation|manpage",
 | |
|         "debug=i",  "dry-run!",
 | |
|         "verbose+", "verify!",
 | |
|         "live!",    "json!",
 | |
|         "config=s",
 | |
|     );
 | |
| 
 | |
|     if ( !GetOptions( $optref, @options ) ) {
 | |
|         pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| }
 | |
| 
 | |
| __END__
 | |
| 
 | |
| 
 | |
| #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| #  Application Documentation
 | |
| #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| #{{{
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| process_comments
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| Process incoming comment files as email messages or JSON files
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 VERSION
 | |
| 
 | |
| This documentation refers to process_comments version 0.3.3
 | |
| 
 | |
| 
 | |
| =head1 USAGE
 | |
| 
 | |
|     ./process_comments [-help] [-doc] [-debug=N] [-[no]dry-run]
 | |
|         [-verbose ...] [-[no]live] [-[no]json] [-config=FILE]
 | |
| 
 | |
|     ./process_comments -dry-run
 | |
|     ./process_comments -debug=3 -dry-run
 | |
|     ./process_comments -verbose -verbose
 | |
|     ./process_comments -help
 | |
|     ./process_comments -documentation
 | |
|     ./process_comments -json
 | |
|     ./process_comments -verb -live -config=.process_comments_live.cfg
 | |
| 
 | |
| =head1 OPTIONS
 | |
| 
 | |
| =over 8
 | |
| 
 | |
| =item B<-help>
 | |
| 
 | |
| Prints a brief help message describing the usage of the program, and then exits.
 | |
| 
 | |
| =item B<-documentation> or B<-manpage>
 | |
| 
 | |
| Prints the entire embedded documentation for the program, then exits.
 | |
| 
 | |
| =item B<-debug=N>
 | |
| 
 | |
| Enables debugging mode when N > 0 (zero is the default). The levels are:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<1>
 | |
| 
 | |
| Reports on the options chosen. Running with the B<-dry-run> option prevents
 | |
| problems with comments if any options have been incorrectly used.
 | |
| 
 | |
| =item B<2>
 | |
| 
 | |
| Prints all of the information described at the previous level.
 | |
| 
 | |
| Prints the names of files found in the mail or JSON spool areas.
 | |
| 
 | |
| Prints the internal details of the email, listing the MIME parts (if there are any).
 | |
| 
 | |
| Prints the length of the MIME part  matching the desired type, in lines.
 | |
| 
 | |
| Prints the entirety of the internal structure holding details of the mail file
 | |
| and the comment it contains. This follows the moderation pass.
 | |
| 
 | |
| Prints the SQL that has been constructed to update the database.
 | |
| 
 | |
| =item B<3>
 | |
| 
 | |
| Prints all of the information described at the previous levels.
 | |
| 
 | |
| Prints the contents of the B<%config> hash built from the configuration file.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =item B<-[no]dry-run>
 | |
| 
 | |
| Controls the program's B<dry-run> mode. It is off by default. In dry-run mode
 | |
| the program reports what it would do but makes no changes. When off the
 | |
| program makes all the changes it is designed to perform.
 | |
| 
 | |
| =item B<-verbose>
 | |
| 
 | |
| This option may be repeated. For each repetition the level of verbosity is
 | |
| increased. By default no verbosity is in effect and the program prints out the
 | |
| minimal amount of information.
 | |
| 
 | |
| Verbosity levels:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<1>
 | |
| 
 | |
| Prints the name of each mail (or JSON) file as it's processed.
 | |
| 
 | |
| Prints any error messages during message validation, which are also being
 | |
| logged (unless in dry-run mode) and saved for reporting later.
 | |
| 
 | |
| Prints a notification if the comment is added to the database (or that this
 | |
| would have happened in dry-run mode).
 | |
| 
 | |
| Prints messages about the moving of each mail (or JSON) file from the
 | |
| processing area, along with any errors accumulated for that file. In dry-run
 | |
| mode simply indicates what would have happened.
 | |
| 
 | |
| Prints the response code received from the server when invoking the interface
 | |
| for updating comment files there. If in dry-run mode the message produced
 | |
| merely indicates what would have happened.
 | |
| 
 | |
| If validation failed earlier on then further information is produced about the
 | |
| final actions taken on these files.
 | |
| 
 | |
| =item B<2>
 | |
| 
 | |
| Prints the addresses each mail message is being sent to (unless in JSON mode).
 | |
| 
 | |
| =item B<3>
 | |
| 
 | |
| Prints the JSON contents of each mail message (or of each JSON file).
 | |
| 
 | |
| =back
 | |
| 
 | |
| =item B<-[no]verify>
 | |
| 
 | |
| This option controls whether the script imposes a delay on comments. The idea
 | |
| is that if the comment feature is used to rant on a subject or to pass
 | |
| misinformation, delaying them will help to defuse the situation.
 | |
| 
 | |
| The default state is B<-noverify>; a delay is not imposed. Selecting
 | |
| B<-verify> means that comments have to be at least 24 hours old before they
 | |
| are processed. The length of the delay cannot currently be changed without
 | |
| altering the script.
 | |
| 
 | |
| This feature of the script has hardly been used and is perhaps against the
 | |
| spirit of HPR, so it will probably be removed in a future version.
 | |
| 
 | |
| =item B<-[no]live>
 | |
| 
 | |
| This option determines whether the program runs in live mode or not. The
 | |
| default varies depending on which system it is being run on.
 | |
| 
 | |
| IT SHOULD NOT USUALLY BE NECESSARY TO USE THIS!
 | |
| 
 | |
| In live mode the program makes changes to the live database and sends messages
 | |
| to the live web interface when a comment has been processed. With live mode
 | |
| off the program assumes it is writing to a clone of the database and it does
 | |
| not inform the webserver that a comment has been processed.
 | |
| 
 | |
| The default for the copy of the program on borg is that live mode is ON.
 | |
| Otherwise the default is that live mode is OFF. The setting is determined by
 | |
| the sed script called B<fixup.sed> on borg. This needs to be run whenever
 | |
| a new version of the program is released. This is done as follows:
 | |
| 
 | |
|     sed -i -f fixup.sed process_comments
 | |
| 
 | |
| =item B<-[no]json>
 | |
| 
 | |
| This option selects JSON mode, which makes the script behave in a different
 | |
| way from the default mode (B<-nojson> or MAIL mode) where it processes email
 | |
| containing comments.
 | |
| 
 | |
| In JSON mode the script looks in a sub-directory called B<json/> where it
 | |
| expects to find JSON files. The normal way in which these files arrive in this
 | |
| directory is by using B<scp> to copy them from the HPR server (the directory
 | |
| is B</home/hpr/comments>). This is a provision in case the normal route of
 | |
| sending out email messages has failed for some reason. It also saves the user
 | |
| from setting up the mail handling infrastructure that would otherwise be
 | |
| needed.
 | |
| 
 | |
| In JSON mode the mail handling logic is not invoked, files are searched for in
 | |
| the B<json/> directory and each file is processed, moderation is requested and
 | |
| the comment is added to the database. In `B<-live>` mode the server is informed
 | |
| that the comment has been processed.
 | |
| 
 | |
| The B<json/> directory needs to have three sub-directories: B<processed>,
 | |
| B<banned> and B<rejected>. The script will place the processed files into
 | |
| these sub-directories according to the moderation choice made. This makes it
 | |
| easier to see what actions were taken and helps avoid repeated processing of
 | |
| the same comment.
 | |
| 
 | |
| =item B<-config=FILE>
 | |
| 
 | |
| This option defines a configuration file other than the default
 | |
| B<.hpr_db.cfg>. The file must be formatted as described below in the section
 | |
| B<CONFIGURATION AND ENVIRONMENT>.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| A script to process new comments, moderate them and add them to the HPR
 | |
| database.
 | |
| 
 | |
| In the new HPR comment system (released September 2017) a new web form is
 | |
| presented in association with each show. The form can be used to submit
 | |
| a comment on the show in question and takes some standard fields: the name of
 | |
| the commenter, the title of the comment and the body of the comment itself.
 | |
| 
 | |
| Once the comment has been submitted its contents are formatted as a JSON
 | |
| object and are sent as a mail attachment to the address
 | |
| B<comments@hackerpublicradio.org>.
 | |
| 
 | |
| Recipients of these mail messages can then perform actions on the comments
 | |
| they contain to cause them to be added to the HPR database. These actions are:
 | |
| approve the comment, block it (because it is inappropriate or some form of
 | |
| Spam and we want to prevent any further messages from the associated IP
 | |
| address), or reject it (delete it). There is also an ignore option which skips
 | |
| the current comment in this run of the script so it can be revisited later.
 | |
| 
 | |
| This script can process an entire email message which has been saved to a file
 | |
| or a file containing the JSON object (as in the email attachment). When
 | |
| processing email it is expected that it will be found in a maildrop directory,
 | |
| and when finished the messages will be placed in sub-directories according to
 | |
| what actions were carried out. A similar logic is used for JSON files; they
 | |
| are expected to be in a drop area and are moved to sub-directories after
 | |
| processing.
 | |
| 
 | |
| The directories used as spool areas are defined in the configuration file(s)
 | |
| described below.
 | |
| 
 | |
| =head2 CHANGES
 | |
| 
 | |
| =head3 2024
 | |
| 
 | |
| An alternative method of comment approval has been added using PHP on the
 | |
| server B<https://hub.hackerpublicradio.org>. This script was originally
 | |
| designed around receiving email messages containing JSON or collecting JSON
 | |
| files. The assumption was that it was the sole agent processing these messages
 | |
| or files and would therefore be the only way that comments could enter the
 | |
| database.
 | |
| 
 | |
| With the new server route it became possible for an emailed comment to be
 | |
| received when that comment was already in the database.
 | |
| 
 | |
| As of version B<0.3.2> the script has been enhanced to check whether comments
 | |
| received as mail messages (or JSON files) have already been processed. If this
 | |
| is found to be the case the incoming data is skipped and stored in the
 | |
| 'B<processed>' directory (because the comment has been approved elsewhere).
 | |
| 
 | |
| There is still scope for confusion - if for example, a comment has been
 | |
| rejected on the server, the script will see it as a potential addition to the
 | |
| database. The moderators need to coordinate their decisions!
 | |
| 
 | |
| The ultimate solution will be to have just one way of moderating and
 | |
| installing comments which is available to any of the "Janitors".
 | |
| 
 | |
| =head2 MAIL HANDLING
 | |
| 
 | |
| One way of handling incoming mail is to use a mail client which is capable of
 | |
| saving messages sent to the above address in the spool area mentioned earlier.
 | |
| For example, Thunderbird can do this by use of a filter and a plugin. Other
 | |
| MUA's will have similar capabilities.
 | |
| 
 | |
| When this script is run on the mail spool area it will process all of the
 | |
| files it finds. For each file it will check its validity in various ways,
 | |
| display the comment then offer a moderation menu. The moderation options are
 | |
| described below.
 | |
| 
 | |
| =head3 APPROVE
 | |
| 
 | |
| If a comment is approved then it will be added to the database, the associated
 | |
| mail file will be moved to a sub-directory (by default called 'B<processed>'),
 | |
| and the HPR server will be notified of this action.
 | |
| 
 | |
| =head3 BAN
 | |
| 
 | |
| If a comment is banned then it will not be added to the database. The mail
 | |
| file will be moved to the sub-directory 'B<banned>' and the HPR server will be
 | |
| informed that the IP address associated with the comment should be placed on
 | |
| a black list.
 | |
| 
 | |
| =head3 REJECT
 | |
| 
 | |
| If a comment is rejected it is not written to the database, the mail file is
 | |
| moved to the sub-directory 'B<rejected>' and the HPR server informed that the
 | |
| comment can be deleted.
 | |
| 
 | |
| =head3 IGNORE
 | |
| 
 | |
| If a comment is ignored it is simply left in the mail spool and no further
 | |
| processing done on it. It will be eligible for processing again when the
 | |
| script is next run.
 | |
| 
 | |
| =head2 JSON FILE HANDLING
 | |
| 
 | |
| As described under the description of the B<-[no]json> option, the script
 | |
| allows the processing of a multiple JSON files, each containing a single
 | |
| comment. These files are expected to be in another spool area (defined int he
 | |
| configuration file). The JSON is checked and all of the comment fields are
 | |
| validated, then the moderation process is begun.
 | |
| 
 | |
| Moderation in this case consists of the same steps as described above except
 | |
| that no mail file actions are taken and the JSON file is moved to
 | |
| a sub-directory after processing.
 | |
| 
 | |
| =head1 DIAGNOSTICS
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<Unable to find configuration file ...>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| The nominated configuration file referenced in B<-config=FILE> was not found.
 | |
| 
 | |
| =item B<No mail found; nothing to do>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| No mail files were found in the mail spool area requiring processing.
 | |
| 
 | |
| =item B<No JSON files found; nothing to do>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| No JSON files were found in the JSON spool area requiring processing.
 | |
| 
 | |
| =item B<Failed to read JSON file '...' ...>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| A JSON file in the spool area could not be read with a JSON parser.
 | |
| 
 | |
| =item B<Failed to parse comment timestamp ...>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| The timestamp must be converted to a format compatible with MySQL/MariaDB but
 | |
| during this process the parse failed.
 | |
| 
 | |
| =item B<Failed to open input file '...' ...>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| A mail file in the spool area could not be opened.
 | |
| 
 | |
| =item B<Failed to move ...>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| A mail file could not be moved to the relevant sub-directory.
 | |
| 
 | |
| =item B<Failed to close input file '...' ...>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| A mail file in the spool area could not be closed.
 | |
| 
 | |
| =item B<Various error messages from the database subsystem>
 | |
| 
 | |
| Type: fatal, warning
 | |
| 
 | |
| An action on the database has been flagged as an error.
 | |
| 
 | |
| =item B<Various error messages from the Template toolkit>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| An action relating to the template used for the display of the comment has
 | |
| been flagged as an error.
 | |
| 
 | |
| =item B<Invalid call to 'call_back' subroutine; missing key>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| The routine 'call_back' was called incorrectly. The key was missing.
 | |
| 
 | |
| =item B<Invalid call to 'call_back' subroutine; invalid action>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| The routine 'call_back' was called incorrectly. The action was invalid.
 | |
| 
 | |
| =item B<Error from remote server indicating failure>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| While attempting to send an action to the remote server with the 'call_back'
 | |
| subroutine an error message was received.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 CONFIGURATION AND ENVIRONMENT
 | |
| 
 | |
| =head2 CONFIGURATION
 | |
| 
 | |
| The configuration file design was changed for version B<0.2.7> and beyond.
 | |
| 
 | |
| The script expects a configuration file to provide it with various program
 | |
| settings, database credentials and HPR CMS credentials.
 | |
| 
 | |
| The default configuration file name is B<.process_comments.cfg> and is for use
 | |
| with the local clone of the hpr database.
 | |
| 
 | |
| This file has the general structure:
 | |
| 
 | |
|  <<include .process_comments_settings.cfg>>
 | |
| 
 | |
|  <<include .hpr_db.cfg>>
 | |
| 
 | |
|  <cms>
 | |
|      user = "dummy:dummy"
 | |
|  </cms>
 | |
| 
 | |
| It uses B<Config::General> 'B<include>' directives to include the file
 | |
| B<.process_comments_settings.cfg> which holds the program settings. It then
 | |
| includes the file B<.hpr_db.cfg> which containss credentials for the cloned
 | |
| database. Finally it defines dummy credentials for the HPR CMS since this
 | |
| feature is not used by default.
 | |
| 
 | |
| The more useful configuration is in B<.process_comments_live.cfg>:
 | |
| 
 | |
|  <<include .process_comments_settings.cfg>>
 | |
| 
 | |
|  <<include .hpr_livedb.cfg>>
 | |
| 
 | |
|  <cms>
 | |
|     <<include .hpradmin_curlrc>>
 | |
|  </cms>
 | |
| 
 | |
| The basic format of the main configuration data after the inclusions have been
 | |
| performed is then:
 | |
| 
 | |
|  <settings>
 | |
|     # Holds program settings
 | |
|  </settings>
 | |
|  <database>
 | |
|     # Holds database settings
 | |
|  </database>
 | |
|  <cms>
 | |
|     # Holds CMS credentials
 | |
|  </cms>
 | |
| 
 | |
| =head3 CONTENTS OF B<.process_comments_settings.cfg>:
 | |
| 
 | |
| This section is enclosed in '<settings>' tags:
 | |
| 
 | |
|  <settings>
 | |
|      PROG       = process_comments
 | |
| 
 | |
|      #
 | |
|      # Defaults
 | |
|      #
 | |
|      basedir    = "$HOME/HPR/Comment_system"
 | |
|      configfile = "$basedir/.hpr_db.cfg"
 | |
|      logfile    = "$basedir/logs/${PROG}.log"
 | |
|      template   = "$basedir/${PROG}.tpl"
 | |
| 
 | |
|      #
 | |
|      # Mail message stash area
 | |
|      #
 | |
|      maildrop   = "$HOME/HPR/CommentDrop"
 | |
|      processed  = "$maildrop/processed"
 | |
|      rejected   = "$maildrop/rejected"
 | |
|      banned     = "$maildrop/banned"
 | |
| 
 | |
|      #
 | |
|      # JSON stash area
 | |
|      #
 | |
|      jsondir    = "$basedir/json"
 | |
|      jprocessed = "$jsondir/processed"
 | |
|      jrejected  = "$jsondir/rejected"
 | |
|      jbanned    = "$jsondir/banned"
 | |
| 
 | |
|      #
 | |
|      # How to tell the server the comment's processed
 | |
|      #
 | |
|      callback_template = \
 | |
|              "https://hackerpublicradio.org/comment_process.php?key=%s&action=%s"
 | |
| 
 | |
|  </settings>
 | |
| 
 | |
| =head3 CONTENTS OF B<.hpr_db.cfg> AND B<.hpr_livedb.cfg>
 | |
| 
 | |
| The default file providing this part is just for use with the default
 | |
| database. The section is enclosed in '<database>' tags:
 | |
| 
 | |
|  <database>
 | |
|      host = 127.0.0.1
 | |
|      port = PORT
 | |
|      name = DATABASE
 | |
|      user = USERNAME
 | |
|      password = PASSWORD
 | |
|  </database>
 | |
| 
 | |
| These settings can alternatively be used to connect to an SSH tunnel which has
 | |
| been connected  from a remote system (like borg) to the live database.
 | |
| Assuming the port chosen for this is 3307 something like the following could
 | |
| be used:
 | |
| 
 | |
|  <database>
 | |
|      host = 127.0.0.1
 | |
|      port = 3307
 | |
|      name = hpr_hpr
 | |
|      user = hpr_hpr
 | |
|      password = "**censored**"
 | |
|  </database>
 | |
| 
 | |
| A typical Bash script for opening a tunnel might be:
 | |
| 
 | |
|  #!/bin/bash
 | |
|  SSHPORT=22
 | |
|  LOCALPORT=3307
 | |
|  REMOTEPORT=3306
 | |
|  ssh -p ${SSHPORT} -f -N -L localhost:${LOCALPORT}:localhost:${REMOTEPORT} hpr@hackerpublicradio.org
 | |
| 
 | |
| =head3 CONTENTS OF B<.hpradmin_curlrc>
 | |
| 
 | |
| This is a file used by the 'curl' command to contain credentials to access the
 | |
| CMS on the HPR server. The format needs to be:
 | |
| 
 | |
|  user = hpradmin:**censored**
 | |
| 
 | |
| This is one of the formats allowed by 'curl' and is also compatible with the
 | |
| B<Config::General> module used by this script.
 | |
| 
 | |
| =head2 TEMPLATE
 | |
| 
 | |
| The program displays the comment that is currently being processed for
 | |
| moderation. It uses a template along with the Perl B<Template> module to do
 | |
| this. By default this template is called B<process_comments.tpl>. This can be
 | |
| changed by changing the line in the "settings" configuration file which sets
 | |
| the 'template' variable.
 | |
| 
 | |
| The template is provided with the following data:
 | |
| 
 | |
|     file        a scalar containing the name of the file being processed
 | |
| 
 | |
|     db          a hash containing the details of the show to which the
 | |
|                 comment relates, returned from a database query:
 | |
|                 id              the episode number
 | |
|                 date            the date of the episode
 | |
|                 title           the episode title
 | |
|                 host            the host name
 | |
| 
 | |
|     comment     a hash containing the fields from the comment:
 | |
|                 eps_id                  the episode number
 | |
|                 comment_timestamp       date and time of the comment
 | |
|                 comment_author_name     comment author
 | |
|                 comment_title           comment title
 | |
|                 comment_text            comment text
 | |
|                 justification           justification for posting (if
 | |
|                                         relevant)
 | |
|                 key                     unique comment key
 | |
| 
 | |
| =head1 DEPENDENCIES
 | |
| 
 | |
|     Carp
 | |
|     Config::General
 | |
|     DBI
 | |
|     Data::Dumper
 | |
|     DateTime::Format::ISO8601
 | |
|     Encode
 | |
|     File::Copy
 | |
|     File::Find::Rule
 | |
|     File::Slurper
 | |
|     Getopt::Long
 | |
|     HTML::Entities
 | |
|     HTML::Restrict
 | |
|     IO::Prompter
 | |
|     JSON
 | |
|     LWP::UserAgent
 | |
|     List::Util
 | |
|     Log::Handler
 | |
|     MIME::Parser
 | |
|     Mail::Address
 | |
|     Mail::Field
 | |
|     Mail::Internet
 | |
|     Pod::Usage
 | |
|     SQL::Abstract
 | |
|     Template
 | |
| 
 | |
| The Perl B<experimental:try> option is used instead of the original
 | |
| B<Try::Tiny> package.
 | |
| 
 | |
| =head1 BUGS AND LIMITATIONS
 | |
| 
 | |
| There are no known bugs in this module.
 | |
| Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
 | |
| Patches are welcome.
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
| Dave Morriss (Dave.Morriss@gmail.com)
 | |
| 
 | |
| =head1 LICENCE AND COPYRIGHT
 | |
| 
 | |
| Copyright (c) 2017 - 2024 Dave Morriss (Dave.Morriss@gmail.com). All rights
 | |
| reserved.
 | |
| 
 | |
| This module is free software; you can redistribute it and/or
 | |
| modify it under the same terms as Perl itself. See perldoc perlartistic.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful,
 | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | |
| 
 | |
| =cut
 | |
| 
 | |
| #}}}
 | |
| 
 | |
| # [zo to open fold, zc to close]
 | |
| 
 | |
| # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 |