#!/usr/bin/env perl #=============================================================================== # # FILE: edit_shownotes # # USAGE: ./edit_shownotes [-help] [-debug=N] [-field=NAME] -episode=N # # DESCRIPTION: Perform edits on metadata in show notes # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: 2022-12-20: originally written to update the shownotes.txt # file as well as shownotes.json. Now we are phasing out the # former and using just the JSON, so the former file is not # being edited. # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.0.5 # CREATED: 2022-12-07 13:05:40 # REVISION: 2024-04-27 22:41:24 # #=============================================================================== use v5.36; use strict; use warnings; use utf8; use feature qw{ postderef say signatures state try }; no warnings qw{ experimental::postderef experimental::signatures experimental::try }; use Getopt::Long; use Pod::Usage qw{pod2usage}; use Term::ANSIColor; use File::Temp; use File::Slurper qw{ read_text write_text }; use File::Copy; use JSON; use Log::Handler; use Data::Dumper; # # Version number (manually incremented) # our $VERSION = '0.0.5'; # # Script and directory names # ( my $PROG = $0 ) =~ s|.*/||mx; ( my $DIR = $0 ) =~ s|/?[^/]*$||mx; $DIR = '.' unless $DIR; #------------------------------------------------------------------------------- # Declarations #------------------------------------------------------------------------------- # # Constants and other declarations # my $basedir = "$ENV{HOME}/HPR/Show_Submission"; my $cache = "$basedir/shownotes"; my $logdir = "$basedir/logs"; my $logfile = "$logdir/${PROG}.log"; my ( $fh, $showno, $showid ); my ( $jsonfile, $jsonfile_bk, $json_text, $JSON_content ); my ( $before, $field_contents ); #my ( $txtfile, $txtfile_bk, $txtstring, @txtbuffer ); # # Text colours # my $red = color('red'); my $green = color('green'); my $yellow = color('yellow'); my $magenta = color('magenta'); my $bold = color('bold'); my $reset = color('reset'); # # How to find (and edit) the fields in shownotes.{txt,json}. # The fields are 'tags', 'title' and 'summary'. # If the JSON file is being edited the primary field offered to the editor is # the first in the array (e.g. 'episode.Tags') and the other one simply echoes # it. # NOTE: 2022-12-20 - stopped modifying the shownotes.txt file # # If the TXT file is being updated (from the primary JSON version edited # earlier) we find it in the array holding the text file using the 'regex' # value, and build a new line using the 'label' value. # This will be cleaned up when the TXT variant is dropped. # my %fields = ( 'tags' => { 'json' => [ 'episode.Tags', 'metadata.POST.tags' ], # 'txt' => { regex => qr{^Tags:}, label => 'Tags', }, }, 'title' => { 'json' => [ 'episode.Title', 'metadata.POST.title' ], # 'txt' => { regex => qr{^Title:}, label => 'Title', }, }, 'summary' => { 'json' => [ 'episode.Summary', 'metadata.POST.summary' ], # 'txt' => { regex => qr{^Summary:}, label => 'Summary', }, }, ); # # Enable Unicode mode # binmode STDOUT, ":encoding(UTF-8)"; binmode STDERR, ":encoding(UTF-8)"; #------------------------------------------------------------------------------- # Options and arguments #------------------------------------------------------------------------------- my $DEF_DEBUG = 0; my $DEF_FIELD = 'tags'; # # Process options # my %options; Options( \%options ); # # Default help # pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 ) if ( $options{'help'} ); # # Detailed help # pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 2, -exitval => 1, ) if ( $options{'documentation'} ); # # Collect options # my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG ); my $field = ( defined( $options{field} ) ? $options{field} : $DEF_FIELD ); my $test_dir = $options{test_dir}; $showno = $options{episode}; pod2usage( -msg => "$PROG version $VERSION\nMissing mandatory option -episode\n", -verbose => 0, -exitval => 1 ) unless $showno; # # Make an id in 'hpr1234' format # $showid = sprintf('hpr%04d',$showno); # # Check that the -field=FIELD option is valid # die "Invalid field specification: $field\n" unless ($field =~ /^(tags|title|summary)$/); # # The files we'll parse and/or edit (and that we'll backup to). Check the # former exist. # If we've received a test directory we'll run in test mode # if ($test_dir) { # # We need a directory here so remove any file component. # die "Missing path $test_dir\n" unless ( -e $test_dir ); unless (-d $test_dir) { # # Trim a file path to what is likely to be a directory only, and test # it exists. Add a trailing '/' if none. # $test_dir =~ s|/?[^/]*$||mx; die "Missing directory $test_dir\n" unless ( -d $test_dir ); $test_dir .= '/' unless ($test_dir =~ qr{/$}); } # # Look in the test place # $jsonfile = "$test_dir/shownotes.json"; # $txtfile = "$test_dir/shownotes.txt"; $jsonfile_bk = "$test_dir/shownotes.json.orig"; # $txtfile_bk = "$test_dir/shownotes.txt.orig"; } else { # # Look in the default place # $jsonfile = "$cache/$showid/shownotes.json"; # $txtfile = "$cache/$showid/shownotes.txt"; $jsonfile_bk = "$cache/$showid/shownotes.json.orig"; # $txtfile_bk = "$cache/$showid/shownotes.txt.orig"; } die colored( "Unable to find JSON file $jsonfile", 'red' ) . "\n" unless ( -e $jsonfile ); #die colored( "Unable to find text file $txtfile", 'red' ) . "\n" # unless ( -e $txtfile ); #------------------------------------------------------------------------------- # Set up logging keeping the default log layout except for the date. The format # is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is # the message. We fiddle with this to implement a 'Test' mode. #------------------------------------------------------------------------------- my $log = Log::Handler->new(); $log->add( file => { timeformat => "%Y/%m/%d %H:%M:%S", message_layout => (defined($test_dir) ? "%T [%L] TEST %m" : "%T [%L] %m"), filename => $logfile, minlevel => 0, maxlevel => 7, } ); #------------------------------------------------------------------------------- # Read the JSON input file and parse it into $JSON_content (hashref) #------------------------------------------------------------------------------- my $json = JSON->new->utf8; open( $fh, '<:encoding(UTF-8)', $jsonfile ); $json_text = <$fh>; close($fh); # # Careful! Bad JSON can crash the script here! # try { $JSON_content = decode_json($json_text); } catch ($e) { die colored( "Failed to decode the JSON in $jsonfile", 'red' ) . "\n" } $log->info( $showno, "Collecting $field from JSON" ); _debug( $DEBUG > 2, Dumper($JSON_content) ); #_debug( $DEBUG > 2, "\$field: $field" ); #_debug( $DEBUG > 2, Dumper(\%fields) ); #------------------------------------------------------------------------------- # Get the contents of the primary JSON field. The group will all be the same, # as will the text versions, so we edit the primary and propagate to the # others. #------------------------------------------------------------------------------- $log->info( $showno, "Collecting from JSON with path '$fields{$field}->{json}->[0]'" ); # # Use the path in the %fields hash to get the prinmary node in the JSON hash. # We are using 'get_JSON' rather than going to the field directly because # we're hunting for different parts of the JSON structure depending on the # -field=NAME option and using a jq-like path stored in %fields. # $field_contents = get_JSON( $JSON_content, $fields{$field}->{json}->[0] ); #say( defined($field_contents) ? $field_contents : 'undefined' ); die colored( "No contents in field '$field'. Can't continue", 'red' ) . "\n" unless ( defined($field_contents) ); $log->info( $showno, "Contents of field: $field_contents" ); $before = $field_contents; # # Run the editor on what we collected # if ( $field eq 'tags' ) { $field_contents = check_field( $field, run_editor($field_contents), 200, qr{(\n)} ); } elsif ( $field eq 'title' ) { $field_contents = check_field( $field, run_editor($field_contents), 100, qr{(\n)} ); } elsif ( $field eq 'summary' ) { $field_contents = check_field( $field, run_editor($field_contents), 100, qr{(\n)} ); } die colored( "Nothing was changed. Exiting", 'red' ) . "\n" if ( $field_contents eq $before ); $log->info( $showno, "New contents of field: $field_contents" ); #------------------------------------------------------------------------------- # Load the text version of the metadata # NOTE: 2022-12-20 - stopped modifying the shownotes.txt file #------------------------------------------------------------------------------- #open( $fh, '<:encoding(UTF-8)', $txtfile ); #@txtbuffer = <$fh>; #close($fh); #------------------------------------------------------------------------------- # Make the changes to all the relevant points in the two data structures. # NOTE: 2022-12-20 - stopped modifying the shownotes.txt file #------------------------------------------------------------------------------- put_JSON( $JSON_content, $fields{$field}->{json}->[0], $field_contents ); put_JSON( $JSON_content, $fields{$field}->{json}->[1], $field_contents ); #_debug( $DEBUG > 1, Dumper($JSON_content) ); $log->info( $showno, "Updated JSON '$field' fields" ); #@txtbuffer = update_txt( \@txtbuffer, $fields{$field}->{txt}->{regex}, # $fields{$field}->{txt}->{label} . ":\t$field_contents\n" ); #_debug( $DEBUG > 1, "Text: " . Dumper( \@txtbuffer ) ); #$log->info( $showno, "Updated text '$field' fields" ); #------------------------------------------------------------------------------- # Turn the data structures back into files, backing up the originals and # writing the new. #------------------------------------------------------------------------------- unless ( -e $jsonfile_bk ) { copy ($jsonfile, $jsonfile_bk) or die "Unable to backup $jsonfile\n"; $log->info( $showno, "JSON backup created" ); } $json_text = encode_json($JSON_content); write_text($jsonfile,$json_text); $log->info( $showno, "JSON file updated" ); #unless ( -e $txtfile_bk ) { # copy ($txtfile, $txtfile_bk) or die "Unable to backup $txtfile\n"; # $log->info( $showno, "Text backup created" ); #} #$txtstring = join('',@txtbuffer); #write_text($txtfile,$txtstring); # #$log->info( $showno, "Text file updated" ); exit; #=== FUNCTION ================================================================ # NAME: get_JSON # PURPOSE: Traverse a data structure using dotted notation and return the # value at the end point # PARAMETERS: $hash hashref to traverse # $path a path in the form of '.key1.key2.[0]' which # should walk to the key 'key1', then 'key2' # within it, and open the array expected to be # there and get element 0. # RETURNS: The value found or undef # DESCRIPTION: Uses 'traverse_JSON' to find the $path in the $hash, returning # a pointer (reference) to it. This can then be de-referenced to # return the item being referenced - unless it's undefined in # which case that value is returned. Being undefined means that # the path did not match the structure of the hash. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub get_JSON { my ( $hash, $path ) = @_; my $pointer = traverse_JSON( $hash, $path ); # # If undef then need to return undef, not try and deref it! # if (defined($pointer)) { return $$pointer; } return; } #=== FUNCTION ================================================================ # NAME: put_JSON # PURPOSE: Traverse a data structure using dotted notation and update the # value at the end point # PARAMETERS: $hash hashref to traverse # $path a path in the form of '.key1.key2.[0]' which # should walk to the key 'key1', then 'key2' # within it, and open the array expected to be # there and rewrite element 0. # $new_value the new value to place at the point in the # hash # RETURNS: The previous value if found, or undef # DESCRIPTION: Uses 'traverse_JSON' to find the $path in the $hash, returning # a pointer (reference) to it. This can then be de-referenced to # return the item being referenced - unless it's undefined in # which case that value is returned. Being undefined means that # the path did not match the structure of the hash. Finally, if # the reference is valid then it's used to save the contents of # $new_value. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub put_JSON { my ( $hash, $path, $new_value ) = @_; my $pointer = traverse_JSON( $hash, $path ); my $old_value; # # If $pointer is defined then dereference it as the return value. Also, # use it to write the new value. # if (defined($pointer)) { $old_value = $$pointer; $$pointer = $new_value; } return $old_value; } #=== FUNCTION ================================================================ # NAME: traverse_JSON # PURPOSE: Traverse a data structure using dotted notation # PARAMETERS: $hash hashref to traverse # $path a path in the form of '.key1.key2.[0]' which # should walk to the key 'key1', then 'key2' # within it, and open the array expected to be # there and return a pointer to element 0. # RETURNS: A reference to the end point of the path, or undef if not # found. # DESCRIPTION: Given a path like '.metadata.POST.title' (where the leading # '.' is optional because it's ignored), this is used to # traverse a Perl version of a JSON data structure to return # a *pointer* (reference) to a node. # # This is done by processing the path step by step and using # 'name' elements as hash keys to get to the next level or # return a terminal hash element. We also cater for '.[0]' # format path elements which means to use the zeroth member of # an array. Thus, if the tags were represented as an array rather # than a scalar CSV string, we'd be able to use a path like: # 'episode.Tags.[0]' to return the first tag. # # The data structure traversal is performed by using a pointer # to a part of it, and since nested hashes are constructed by # using hashrefs as hash element values, we get back references # by default. # # Once the terminal node has been reached it may be a scalar # (very likely in this application), so we make and return # a reference to it so that the caller can use this reference in # a standard way to return or change the contents. # # This function is meant to be more generic than this # application requires. The JSON we're dealing with here doesn't # have arrays (at the moment). This description is # application-specifc however! # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub traverse_JSON { my ( $hash, $path ) = @_; my ( $pointer, $index ); my @elements = split( '\.', $path ); # # Because $hash is a hashref we can just copy it # $pointer = $hash; # # Traverse the hash using the path # foreach my $element (@elements) { next unless defined($element); # # Plain names are keys or scalar end nodes, bracketed things are # arrays (which can be scalars of course) # unless ( ($index) = ( $element =~ /\[([^]])\]/ ) ) { # Hash key if ( exists( $pointer->{$element} ) ) { if ( ref( $pointer->{$element} ) eq '' ) { # # It's not a refererence so make a reference to it # $pointer = \$pointer->{$element}; } else { # # It's a reference # $pointer = $pointer->{$element}; } } else { # # Doesn't exist! # return; #undef } } else { # Array if ( exists( $pointer->[$index] ) ) { if ( ref( $pointer->[$index] ) eq '' ) { # # It's not a refererence so make a reference to it # $pointer = \$pointer->[$index]; } else { # # It's a reference # $pointer = $pointer->[$index]; } } else { # # Doesn't exist! # return; # undef } } } return $pointer; } #=== FUNCTION ================================================================ # NAME: update_txt # PURPOSE: Update the data structure from 'shownotes.txt' in the light of # requested changes. # PARAMETERS: $array arrayref to the contents of the shownotes.txt # file # $regex regular expression to find the line that needs # to be changed (there's just one main one). # $new_value new value to replace the original line # RETURNS: Modified array # DESCRIPTION: Finds the element in the arrayref ($array) which matches the # regular expression ($regex). When found it is replaced by the # new value ($new_value). The modified buffer is returned as # a list. Uses $result to determine the value of each line since # it is not permitted to change the $_ variable in the 'map' # statement. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub update_txt { my ( $array, $regex, $new_value ) = @_; my $result; my @newbuffer = map { $result = ( $_ =~ /$regex/ ? $new_value : $_ ); $result } @$array; return @newbuffer; } #=== FUNCTION ================================================================ # NAME: run_editor # PURPOSE: Run an interactive vim editor on a string # PARAMETERS: $string String to edit # $options An arrayref containing options for vim # (optional) Example '+set paste'. Each option # (such as '-c startinsert') needs to be # a separate array element. # RETURNS: Edited string # DESCRIPTION: Makes a temporary file with File::Temp ensuring that the file # is in utf8 mode. Writes the edit string to the file and invokes # the 'vim' editor on it. The resulting file is then read back # into a string and returned to the caller, again taking care to # retain utf8 mode. # THROWS: No exceptions # COMMENTS: File::Slurp and UTF-8 don't go well together. Moved to # File::Slurper instead # SEE ALSO: N/A #=============================================================================== sub run_editor { my ( $string, $options ) = @_; # # Build an arguments array for 'system' # my @args; push( @args, @$options ) if $options; # # Make a temporary file # my $tfh = File::Temp->new; binmode $tfh, ":encoding(UTF-8)"; my $tfn = $tfh->filename; print $tfh $string if $string; $tfh->close; # # Add the filename to the arguments # push( @args, $tfn ); die "Edit failed\n" unless ( system( ( 'vim', @args ) ) == 0 ); return read_text($tfn); } #=== FUNCTION ================================================================ # NAME: check_field # PURPOSE: Checks the a field is not too long and doesn't contain certain # characters # PARAMETERS: $name name of field # $field string to be checked # $maxlen maximum string length # $regex regex containing illegal characters to be removed # RETURNS: The input string truncated and with any illegal characters # removed. # DESCRIPTION: Runs a substitution on the string then truncates the result if # it is too long. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub check_field { my ( $name, $field, $maxlen, $regex ) = @_; return unless $field; $field =~ s/$regex//g; if ( length($field) > $maxlen ) { warn "Field '$name' too long (" . length($field) . "); truncated to " . $maxlen . "\n"; $field = substr( $field, 0, $maxlen ); } return $field; } #=== FUNCTION ================================================================ # NAME: coalesce # PURPOSE: To find the first defined argument and return it # PARAMETERS: Arbitrary number of arguments # RETURNS: The first defined argument or undef if there are none # DESCRIPTION: # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub coalesce { foreach (@_) { return $_ if defined($_); } return undef; ## no critic } #=== FUNCTION ================================================================ # NAME: _debug # PURPOSE: Prints debug reports # PARAMETERS: $active Boolean: 1 for print, 0 for no print # $message Message to print # RETURNS: Nothing # DESCRIPTION: Outputs a message if $active is true. It removes any trailing # newline and then adds one in the 'print' to the caller doesn't # have to bother. Prepends the message with 'D> ' to show it's # a debug message. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A #=============================================================================== sub _debug { my ( $active, $message ) = @_; chomp($message); print STDERR "D> $message\n" if $active; } #=== FUNCTION ================================================================ # NAME: Options # PURPOSE: Processes command-line options # PARAMETERS: $optref Hash reference to hold the options # RETURNS: Undef # DESCRIPTION: # THROWS: no exceptions # COMMENTS: none # SEE ALSO: n/a #=============================================================================== sub Options { my ($optref) = @_; my @options = ( "help", "documentation|manpage", "debug=i", "field=s", "episode=i", "test_dir=s", ); if ( !GetOptions( $optref, @options ) ) { pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 0, -exitval => 1 ); } return; } __END__ #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Application Documentation #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #{{{ =head1 NAME edit_shownotes - edit JSON and text shownote metadata =head1 VERSION This documentation refers to edit_shownotes version 0.0.5 =head1 USAGE ./edit_shownotes [-help] [-documentation|manpage] [-debug=N] [-field=FIELD] -episode=N ./edit_shownotes -field=tags -episode=3754 ./edit_shownotes -field=summary -episode=3762 \ -test_dir=$PWD/tests/shownotes/hpr3762 =head1 OPTIONS =over 8 =item B<-help> Prints a brief help message describing the usage of the program, and then exits. =item B<-manpage> or B<-documentation> Prints the entire documentation for the script. =item B<-field=FIELD> This optional parameter defaults to B<-field=tags>. The only permitted fields are 'tags', 'title' and 'summary'. =item B<-episode=N> This mandatory option specifies the number of the HPR episode that is being parsed. It needs to be numeric even though it is often used with an 'hpr' prefix internally. Leading zeroes are not required (when these are appropriate). =item B<-test_dir=PATH> This option is for testing. Normally the script determines the path to the file(s) it will parse and update. It actually finds the field specified by the B<-field-FIELD> option in the JSON file B and calls the editor to change this, then propagates any changes to the other JSON instance and the one in B. This option has been provided to make it simpler to make copies of the files relating to an incoming show and use them to test changes to the script. The current workflow expects the "live" files in B<$HOME/HPR/Show_Submission/shownotes/hprSHOW>, but there is another area where show files are placed for testing: B<$HOME/HPR/Show_Submission/tests/shownotes/hprSHOW>. It is simply a matter of making a copy and of referencing the directory path to this option. Changes will be made to this copy and log records written, but these will contain the word 'TEST' to show that the script is being run in this mode. =item B<-debug=N> Causes certain debugging information to be displayed. 0 (the default) no debug output 1 N/A 2 TBA 3 TBA =back =head1 DESCRIPTION This script is meant to be a component in a workflow dealing with the notes submitted by HPR hosts with the shows they are uploading. The data from the web form which is used by hosts submitting shows is received in two formats, with fixed names. The file B contains the form data in a label/value format, but also uses PHP dumps to show some of the metadata. It is no longer used by the current workflow, but is retained for historical reasons. The alternative file format is JSON, which is stored in B. This contains the same data as the other file but in a more standardised format. This script is designed to allow the editing of three specific fields in these two files. This is because these fields are not used in the show note preparation workflow (though they are referenced), but any changes can be propagated back to the HPR server to allow workflows there to use the corrected version(s). The fields which can be edited are duplicated in the two files. One of these is edited, and if appropriate, is propagated to the other duplicates. This simplifies the editing of the text format, and the JSON format, which is not simple to edit in a conventional way. =head1 DIAGNOSTICS A list of every error and warning message that the application can generate (even the ones that will "never happen"), with a full explanation of each problem, one or more likely causes, and any suggested remedies. If the application generates exit status codes (e.g. under Unix) then list the exit status associated with each error. =head1 CONFIGURATION AND ENVIRONMENT A full explanation of any configuration system(s) used by the application, including the names and locations of any configuration files, and the meaning of any environment variables or properties that can be set. These descriptions must also include details of any configuration language used =head1 DEPENDENCIES Data::Dumper File::Slurper File::Temp Getopt::Long JSON Log::Handler Pod::Usage Term::ANSIColor =head1 BUGS AND LIMITATIONS There are no known bugs in this module. Please report problems to Dave Morriss (Dave.Morriss@gmail.com) Patches are welcome. =head1 AUTHOR Dave Morriss (Dave.Morriss@gmail.com) =head1 LICENCE AND COPYRIGHT Copyright (c) 2022 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See perldoc perlartistic. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. =cut #}}} # [zo to open fold, zc to close] # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker