| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #!/usr/bin/env perl | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #         FILE: parse_JSON | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #        USAGE: ./parse_JSON [-help] [-manpage] -episode=N -infile=FILE | 
					
						
							|  |  |  | #                       [-shownotes[=FILE]] [-format=FILE] [-release=FILE] | 
					
						
							|  |  |  | #                       [-assets=FILE] [-debug=N] [-[no]silent] [-[no]test] | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #  DESCRIPTION: Script to parse the JSON now being generated for HPR | 
					
						
							|  |  |  | #               shows. Replaces the original 'parse_shownotes' and is a fair | 
					
						
							|  |  |  | #               bit simpler because the parsing of the original PHP dump isn't | 
					
						
							|  |  |  | #               needed. However, it has grown in complexity as features for | 
					
						
							|  |  |  | #               handling assets have been added. Downstream scripts now need | 
					
						
							|  |  |  | #               information about pictures and assets, and all archive files | 
					
						
							|  |  |  | #               need to be unpacked and placed in a directory for uploading to | 
					
						
							|  |  |  | #               the HPR server. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #      OPTIONS: --- | 
					
						
							|  |  |  | # REQUIREMENTS: --- | 
					
						
							|  |  |  | #         BUGS: --- | 
					
						
							|  |  |  | #        NOTES: --- | 
					
						
							|  |  |  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | #      VERSION: 0.0.17 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #      CREATED: 2020-11-28 10:52:02 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | #     REVISION: 2024-10-04 18:37:29 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | use v5.36; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | use utf8; | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | use feature qw{ postderef say signatures state try }; | 
					
						
							|  |  |  | no warnings | 
					
						
							|  |  |  |     qw{ experimental::postderef experimental::signatures experimental::try }; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | use Getopt::Long; | 
					
						
							|  |  |  | use Pod::Usage; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Term::ANSIColor; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use HTML::Parser (); | 
					
						
							|  |  |  | use HTML::LinkExtor; | 
					
						
							|  |  |  | use Text::CSV; | 
					
						
							|  |  |  | use Text::SpellChecker; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use JSON; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use List::MoreUtils qw{any}; | 
					
						
							|  |  |  | use Archive::Any; | 
					
						
							|  |  |  | use MIME::Types; | 
					
						
							|  |  |  | use IO::Compress::Zip qw(:all); | 
					
						
							|  |  |  | use File::Copy; | 
					
						
							|  |  |  | use File::Spec::Functions; | 
					
						
							|  |  |  | use Path::Class; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Log::Handler; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use Data::Dumper; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Version number (manually incremented) | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | our $VERSION = '0.0.17'; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Script and directory names | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | ( my $PROG = $0 ) =~ s|.*/||mx; | 
					
						
							|  |  |  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | 
					
						
							|  |  |  | $DIR = '.' unless $DIR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Declarations | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Constants and other declarations | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $basedir = "$ENV{HOME}/HPR/Show_Submission"; | 
					
						
							|  |  |  | my $logdir  = "$basedir/logs"; | 
					
						
							|  |  |  | my $logfile = "$logdir/${PROG}.log"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my ( $snlevel, $showno, $summarylength, $notelength ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | our $MARKUP_DETECTED = 0; | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Maximum and minimum number of characters (bytes) | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | my $MAXNOTELEN = 4000; | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | my $MINNOTELEN = 10; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Printing: general output format | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $ofmt = "%-17s %s\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # URL base for relative URLs pointing at assets | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $base_url = 'https://hackerpublicradio.org/'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Text colours | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $red     = color('red'); | 
					
						
							|  |  |  | my $green   = color('green'); | 
					
						
							|  |  |  | my $yellow  = color('yellow'); | 
					
						
							|  |  |  | my $magenta = color('magenta'); | 
					
						
							|  |  |  | my $bold    = color('bold'); | 
					
						
							|  |  |  | my $reset   = color('reset'); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my %snatts = ( | 
					
						
							|  |  |  |     html_percent => 0, | 
					
						
							|  |  |  |     html_tags    => 0, | 
					
						
							|  |  |  |     is_markup    => 0, | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # A list of signature expressions used to try and guess what markup format was | 
					
						
							|  |  |  | # used. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # {{{ | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Note that the regexes are applied to a string which is a concatenation of | 
					
						
							|  |  |  | # the notes, so care needs to be taken with anchors. After processing the JSON | 
					
						
							|  |  |  | # the lines of the notes will be separated by newline characters. Use an 'm' | 
					
						
							|  |  |  | # qualifier to make a '^' anchor treat it as multi-line. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my @markup_types = ( | 
					
						
							|  |  |  |     # Has [...](...) and  entities | 
					
						
							|  |  |  |     {   regex   => qr{!?\[[^\]]+\]\(https?:[^)]+\)}, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown link or image" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has [...][nnn] entities | 
					
						
							|  |  |  |     {   regex   => qr{\[[^\]]+\]\[[^\]]+\]}, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown reference" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has lines beginning with 1-6 hashes (Atx-style header) | 
					
						
							|  |  |  |     {   regex   => qr{^#{1,6} \w}m, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown Atx-style header" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # A line of stuff followed by a line of = or - (Setex-style header) | 
					
						
							|  |  |  |     {   regex   => qr{^(\s|\w)+.^[=-]+$}sm, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown Setex-style header" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has lines starting with 0+ spaces hyphen 1+ spaces | 
					
						
							|  |  |  |     {   regex   => qr{^\s*[-*]\s+\w}m, | 
					
						
							|  |  |  |         type    => [ 'markdown', 'reStructuredText', 'org-mode' ], | 
					
						
							|  |  |  |         comment => "Markdown or reStructuredText list element" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has lines beginning with '\d.\s' (number(s) dot space(s)) | 
					
						
							|  |  |  |     {   regex   => qr{^\s*\d+\.\s+\w}m, | 
					
						
							|  |  |  |         type    => [ 'markdown', 'reStructuredText' ], | 
					
						
							|  |  |  |         comment => | 
					
						
							|  |  |  |             "Markdown, reStructuredText or org-mode ordered list element" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has lines which begin with 3+ ` or ~ | 
					
						
							|  |  |  |     {   regex   => qr{^[`~]{3,}$}m, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown fenced block delimiter" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has words enclosed in 1 or 2 * | 
					
						
							|  |  |  |     {   regex   => qr{\*{1,2}\w+\*{1,2}}, | 
					
						
							|  |  |  |         type    => [ 'markdown', 'reStructuredText' ], | 
					
						
							|  |  |  |         comment => "Markdown or reStructuredText emphasis or bold" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has words enclosed in ` | 
					
						
							|  |  |  |     {   regex   => qr{`\w+`}, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown verbatim text" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has a line of 3 or more *, - or _ possibly with intervening spaces | 
					
						
							|  |  |  |     {   regex   => qr{^[*_-]\s?[*_-]\s?[*_-][*_ -]*$}m, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown horizontal rule" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has bare http/https URLs | 
					
						
							|  |  |  |     {   regex   => qr{[^(]\bhttps?:}, | 
					
						
							|  |  |  |         type    => ['txt2tags'], | 
					
						
							|  |  |  |         comment => "Bare http(s) url" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has [...]_ entities (footnote, citation reference) | 
					
						
							|  |  |  |     {   regex   => qr{\[(\w+|\d+|#|\*)\]_}, | 
					
						
							|  |  |  |         type    => ['reStructuredText'], | 
					
						
							|  |  |  |         comment => "reStructuredText footnote, citation or reference" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has `Text <URL>`_ entities (hyperlinks) | 
					
						
							|  |  |  |     {   regex   => qr{`\w+<https?:[^>]+>`_}, | 
					
						
							|  |  |  |         type    => ['reStructuredText'], | 
					
						
							|  |  |  |         comment => "reStructuredText hyperlink" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Has superscript or subscript markup | 
					
						
							|  |  |  |     {   regex   => qr{(\^\w+\^|\~\w+\~)}, | 
					
						
							|  |  |  |         type    => ['markdown'], | 
					
						
							|  |  |  |         comment => "Markdown superscript or subscript" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # STARS KEYWORD PRIORITY TITLE TAGS is a header definition in Org mode | 
					
						
							|  |  |  |     {   regex => | 
					
						
							|  |  |  |             qr{^\*+\s(TODO|FEEDBACK|VERIFY|\||DONE|DELEGATED)?\s?(\[[#]\w\])?\s?\w+}m, | 
					
						
							|  |  |  |         type    => ['org-mode'], | 
					
						
							|  |  |  |         comment => "Org mode header" | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     #    {   regex   => qr{}, | 
					
						
							|  |  |  |     #        type    => ['markdown'], | 
					
						
							|  |  |  |     #        comment => "" | 
					
						
							|  |  |  |     #    }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # }}} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my @markup_found; | 
					
						
							|  |  |  | my $markup_choice; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | my ( $showdir,     $assetdir, $statusfile ); | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | my ( %media_files, $media_files ); | 
					
						
							|  |  |  | my ( %assets,      @assets,    @audio, $sanitised ); | 
					
						
							|  |  |  | my ( $has_audio,   $has_extra, $has_archives ); | 
					
						
							|  |  |  | my ( @archives,    @extracted ); | 
					
						
							|  |  |  | my ( $astate,      $has_pictures, @pictures, $pictures ); | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | my ( $has_markup,  @markup,       $markup ); | 
					
						
							|  |  |  | my ( $json_change, @json_changes, %spellchecks ); | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | my @pstates = ( | 
					
						
							|  |  |  |     'No pictures found',                # 0 | 
					
						
							|  |  |  |     'Pictures that need management',    # 1 | 
					
						
							|  |  |  |     'Managed pictures',                 # 2 | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Enable Unicode mode | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | binmode STDOUT, ":encoding(UTF-8)"; | 
					
						
							|  |  |  | binmode STDERR, ":encoding(UTF-8)"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Options and arguments | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $DEF_DEBUG = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Process options | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my %options; | 
					
						
							|  |  |  | Options( \%options ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Default help | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | pod2usage( | 
					
						
							|  |  |  |     -msg     => "$PROG version $VERSION\n", | 
					
						
							|  |  |  |     -verbose => 0, | 
					
						
							|  |  |  |     -exitval => 1 | 
					
						
							|  |  |  | ) if ( $options{'help'} ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Detailed help | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | pod2usage( | 
					
						
							|  |  |  |     -msg       => "$PROG version $VERSION\n", | 
					
						
							|  |  |  |     -verbose   => 2, | 
					
						
							|  |  |  |     -exitval   => 1, | 
					
						
							|  |  |  |     -noperldoc => 0 | 
					
						
							|  |  |  | ) if ( $options{'documentation'} ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Collect options | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $DEBUG  = ( defined( $options{debug} )  ? $options{debug}  : $DEF_DEBUG ); | 
					
						
							|  |  |  | my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 ); | 
					
						
							|  |  |  | my $test   = ( defined( $options{test} )   ? $options{test}   : 0 ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | $showno = $options{episode}; | 
					
						
							|  |  |  | pod2usage( | 
					
						
							|  |  |  |     -msg     => "$PROG version $VERSION\nMissing mandatory option -episode\n", | 
					
						
							|  |  |  |     -verbose => 0, | 
					
						
							|  |  |  |     -exitval => 1 | 
					
						
							|  |  |  | ) unless $showno; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $infile = $options{infile}; | 
					
						
							|  |  |  | pod2usage( | 
					
						
							|  |  |  |     -msg     => "$PROG version $VERSION\nMissing mandatory option -infile\n", | 
					
						
							|  |  |  |     -verbose => 0, | 
					
						
							|  |  |  |     -exitval => 1 | 
					
						
							|  |  |  | ) unless $infile; | 
					
						
							|  |  |  | die "Unable to access input file $infile: $!\n" unless -r $infile; | 
					
						
							|  |  |  | die "Input file $infile is empty\n" if -z $infile; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Where to put various files. The second group of options must all have | 
					
						
							|  |  |  | # filenames to be created, but the first can define its own name if not given. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $shownotes = $options{shownotes}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | my $formatfile  = $options{format}; | 
					
						
							|  |  |  | my $releasefile = $options{release}; | 
					
						
							|  |  |  | my $assetfile   = $options{assets}; | 
					
						
							|  |  |  | my $picturefile = $options{pictures}; | 
					
						
							|  |  |  | my $zipfile     = $options{zip}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Work on the show notes output file, allowing defaults and substitution points | 
					
						
							|  |  |  | # for convenience. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( defined($shownotes) ) { | 
					
						
							|  |  |  |     $shownotes = output_file_name( $shownotes, $showno, 'hpr%d.out' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Directories and file specific to this show | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $showdir  = "$basedir/shownotes/hpr$showno"; | 
					
						
							|  |  |  | $assetdir = "$showdir/uploads"; | 
					
						
							|  |  |  | $statusfile = "$showdir/.status"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Set up logging keeping the default log layout except for the date. The format | 
					
						
							|  |  |  | # is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is | 
					
						
							|  |  |  | # the message. | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $log = Log::Handler->new(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | $log->add( | 
					
						
							|  |  |  |     file => { | 
					
						
							|  |  |  |         timeformat     => "%Y/%m/%d %H:%M:%S", | 
					
						
							|  |  |  |         message_layout => ($test ? 'T ' : '') . "%T [%L] %m", | 
					
						
							|  |  |  |         filename       => $logfile, | 
					
						
							|  |  |  |         minlevel       => 0,                     # emergency, emerg | 
					
						
							|  |  |  |         maxlevel       => 7,                     # debug | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | #  Read the input file and parse it into $content (hashref) | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | my $json = JSON->new->utf8; | 
					
						
							|  |  |  | open( my $fh, '<:encoding(UTF-8)', $infile ); | 
					
						
							|  |  |  | my $json_text = <$fh>; | 
					
						
							|  |  |  | close($fh); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | my $content; | 
					
						
							|  |  |  | try { | 
					
						
							|  |  |  |     $content = decode_json($json_text); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | catch ($e) { | 
					
						
							|  |  |  |     die colored( "Failed to decode the JSON in $infile", 'red' ) . "\n" | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | $log->info( $showno, "[$VERSION] Processing $infile" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _debug( $DEBUG > 2, Dumper($content) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Validate the JSON structure in case it's changed for some reason | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | die "The JSON doesn't have the right structure\n" | 
					
						
							|  |  |  |     unless ( exists( $content->{episode} ) | 
					
						
							|  |  |  |     && exists( $content->{metadata} ) | 
					
						
							|  |  |  |     && exists( $content->{host} ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | print STDERR '-' x 80, "\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Report details from the parsed data. The percentages of HTML in the Host | 
					
						
							|  |  |  | # Profile and the notes themselves are computed using an HTML parser that counts | 
					
						
							|  |  |  | # the tags. | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | printf STDERR $ofmt, "Show:", $content->{metadata}{Episode_Number}; | 
					
						
							|  |  |  | printf STDERR $ofmt, "Date:", $content->{metadata}{Episode_Date}; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # Trim off leading and trailing spaces in these fields | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $json_change = 0; | 
					
						
							|  |  |  | for my $key ( 'Title', 'Summary', 'Tags' ) { | 
					
						
							|  |  |  |     my $str = trim($content->{episode}{$key}); | 
					
						
							|  |  |  |     if ($str ne $content->{episode}{$key}) { | 
					
						
							|  |  |  |         $content->{episode}{$key} = $str; | 
					
						
							|  |  |  |         $json_change = 1; | 
					
						
							|  |  |  |         push(@json_changes,$key); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | # Detect Unicode in the Title, Summary or Tags and flag their presence. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | for my $key ( 'Title', 'Summary', 'Tags' ) { | 
					
						
							|  |  |  |     my $uflag = ( $content->{episode}{$key} =~ /[^\x{00}-\x{7F}]/ ); | 
					
						
							|  |  |  |     printf STDERR $ofmt, "$key:", | 
					
						
							|  |  |  |         colour_if( | 
					
						
							|  |  |  |             $uflag, | 
					
						
							|  |  |  | #            'bold yellow on_magenta', | 
					
						
							|  |  |  |             'black on_bright_yellow', | 
					
						
							|  |  |  |             $content->{episode}{$key} | 
					
						
							|  |  |  |         ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | alert( $ofmt, "JSON to be updated; changes to: " . | 
					
						
							|  |  |  |     join(',',@json_changes) ) if $json_change; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | # Check summary length. The field might be filled and something might have | 
					
						
							|  |  |  | # been lost. | 
					
						
							|  |  |  | # TODO: Check on other lengths? | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $summarylength = length($content->{episode}{Summary}); | 
					
						
							|  |  |  | if ($summarylength eq 100) { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "- Summary: check length"; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Perform a spelling check on these fields. Collect any that are detected | 
					
						
							|  |  |  | # then, if any were found, report them. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | for my $key ( 'Title', 'Summary', 'Tags' ) { | 
					
						
							|  |  |  |     my @errors = spellcheck( $content->{episode}{$key} ); | 
					
						
							|  |  |  |     $spellchecks{$key} = \@errors if @errors; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | _debug( $DEBUG > 2, '%spellchecks: ' . Dumper( \%spellchecks ) ); | 
					
						
							|  |  |  | if (%spellchecks) { | 
					
						
							|  |  |  |     print STDERR colored( "Spelling checks:\n", 'bold yellow' ); | 
					
						
							|  |  |  |     for my $key ( 'Title', 'Summary', 'Tags' ) { | 
					
						
							|  |  |  |         printf STDERR $ofmt, "- $key:", join( ',', @{ $spellchecks{$key} } ) | 
					
						
							|  |  |  |             if ( exists( $spellchecks{$key} ) ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Report on HTML in profile and notes | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | printf STDERR $ofmt, "Profile:", | 
					
						
							|  |  |  |     count_tags( $content->{host}{Host_Profile} ) | 
					
						
							|  |  |  |     . " HTML start/end tags found"; | 
					
						
							|  |  |  | $snlevel = count_tags( $content->{episode}{Show_Notes} ); | 
					
						
							|  |  |  | printf STDERR $ofmt, "Notes:", $snlevel . " HTML start/end tags found"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Check the note length because if they are too long we might want to split | 
					
						
							|  |  |  | # them | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $notelength = length( $content->{episode}{Show_Notes} ); | 
					
						
							|  |  |  | if ( $notelength > $MAXNOTELEN ) { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Notes:", | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  |         colored( "Notes are longer than $MAXNOTELEN bytes ($notelength)", | 
					
						
							|  |  |  |         'bold yellow on_magenta' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | elsif ( $notelength <= $MINNOTELEN ) { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Notes:", | 
					
						
							|  |  |  |         colored( "Notes are shorter than $MINNOTELEN bytes ($notelength)", | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         'bold yellow on_magenta' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Look for Unicode in the notes and flag it if found | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( find_Unicode( $content->{episode}{Show_Notes} ) ) { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Notes:", | 
					
						
							|  |  |  |         colored( "Unicode characters found", 'bold yellow on_magenta' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Always check that the given show number matches what we parsed | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | die "The episode specified ($showno) doesn't match the one in the JSON " | 
					
						
							|  |  |  |     . "($content->{metadata}{Episode_Number})\n" | 
					
						
							|  |  |  |     unless ( $content->{metadata}{Episode_Number} == $showno ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # A zero host id means this is a new host. HOWEVER, it can also mean an | 
					
						
							|  |  |  | # existing host gave a mail address we don't have in the database. to take | 
					
						
							|  |  |  | # care! | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( $content->{host}{Host_ID} eq 0 ) { | 
					
						
							|  |  |  |     print STDERR colored( | 
					
						
							|  |  |  |         sprintf( $ofmt, "New host:", $content->{host}{Host_Name} ), | 
					
						
							|  |  |  |         'bold yellow' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Host name:", $content->{host}{Host_Name}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Save HTML tag count | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $snatts{html_tags} = $snlevel; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Have a go at the HTML tag percentage | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | $snatts{html_percent} | 
					
						
							|  |  |  |     = html_level( $content->{episode}{Show_Notes}, $snlevel ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if ( $snlevel > 0 ) { | 
					
						
							|  |  |  |     $log->info( $showno, "Notes contain HTML" ); | 
					
						
							|  |  |  |     push( @markup_found, 'HTML' ); | 
					
						
							|  |  |  |     if ( $snatts{html_percent} > 5 ) { | 
					
						
							|  |  |  |         $log->info( $showno, "Notes probably are HTML" ); | 
					
						
							|  |  |  |         push( @markup_found, 'HTML' ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         push( | 
					
						
							|  |  |  |             @markup_found, | 
					
						
							|  |  |  |             @{  detect_markup( | 
					
						
							|  |  |  |                     $content->{episode}{Show_Notes}, \@markup_types, | 
					
						
							|  |  |  |                     $ofmt | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     $log->info( $showno, "Notes contain no HTML" ); | 
					
						
							|  |  |  |     push( | 
					
						
							|  |  |  |         @markup_found, | 
					
						
							|  |  |  |         @{  detect_markup( $content->{episode}{Show_Notes}, | 
					
						
							|  |  |  |                 \@markup_types, $ofmt ) | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Report what markup was used (not really necessary any more but left in | 
					
						
							|  |  |  | # because it's interesting). | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # 2022-02-18 Now we have added 'org-mode' which doesn't match any formats sent | 
					
						
							|  |  |  | # from the web form, so we highlight this to help with the editing process. | 
					
						
							|  |  |  | # It's possible to use Pandoc to generate Markdown for editing or just convert | 
					
						
							|  |  |  | # straight to HTML if the result looks good. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $markup_choice = choose_markup( \@markup_found ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | printf STDERR $ofmt, "Declared format:", | 
					
						
							|  |  |  |     colour_if( | 
					
						
							|  |  |  |         $content->{metadata}{Shownotes_Format} ne 'plain_text', | 
					
						
							|  |  |  |         'bold yellow on_magenta', | 
					
						
							|  |  |  |         $content->{metadata}{Shownotes_Format} | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | printf STDERR $ofmt, "Found:", summarise_markup( \@markup_found ); | 
					
						
							|  |  |  | #printf STDERR $ofmt, "Recommendation:", | 
					
						
							|  |  |  | #   colour_if( | 
					
						
							|  |  |  | #       $markup_choice eq 'org-mode', | 
					
						
							|  |  |  | #       'bold yellow on_magenta', | 
					
						
							|  |  |  | #       $markup_choice | 
					
						
							|  |  |  | #   ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | printf STDERR $ofmt, "Recommendation:", | 
					
						
							|  |  |  |     colour_switch( | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             $markup_choice eq 'org-mode', | 
					
						
							|  |  |  |             $markup_choice eq 'markdown', | 
					
						
							|  |  |  |             $markup_choice eq 'txt2tags', | 
					
						
							|  |  |  |             $markup_choice eq 'none', | 
					
						
							|  |  |  |             $markup_choice =~ / or /, | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             'bold yellow on_magenta', | 
					
						
							|  |  |  |             'yellow', | 
					
						
							|  |  |  |             'yellow', | 
					
						
							|  |  |  |             'red', | 
					
						
							|  |  |  |             'yellow', | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |         $markup_choice | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # What about the media? | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Examine the delivered media details. Start by finding the maximum index | 
					
						
							|  |  |  | # (media_files is a JSON hash containing keys like 'name' where the values | 
					
						
							|  |  |  | # are arrays). The elements are linked by their positions. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | my $maxind = $#{ $content->{metadata}{FILES}->{media_files}->{name} }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Step through the media file names and their sizes building a Perl hash keyed by the | 
					
						
							|  |  |  | # name containing the file size of each | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | for ( my $i = 0; $i <= $maxind; $i++ ) { | 
					
						
							|  |  |  |     $media_files{ $content->{metadata}{FILES}->{media_files}->{name}->[$i] } | 
					
						
							|  |  |  |         = $content->{metadata}{FILES}->{media_files}->{size}->[$i]; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # If a media file is zero size, remove it from the list | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | for my $key ( keys(%media_files) ) { | 
					
						
							|  |  |  |     if ( $media_files{$key} eq 0 ) { | 
					
						
							|  |  |  |         delete( $media_files{$key} ); | 
					
						
							|  |  |  |         $maxind--; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Generate a string containing the names of the (non-empty) media files | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if (%media_files) { | 
					
						
							|  |  |  |     $media_files = join( ", ", keys(%media_files) ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Determine the 'asset' state of the show | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # (The information is in %media_files although strictly there could be other | 
					
						
							|  |  |  | # files like scripts, and pictures.) The audio is in %media_files unless the | 
					
						
							|  |  |  | # host has sent in an URL for downloading. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if (%media_files) { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Build a hash (%assets) keyed by filename with the value being a hashref | 
					
						
							|  |  |  |     # with keys 'type' and 'size'. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     for ( my $i = 0; $i <= $maxind; $i++ ) { | 
					
						
							|  |  |  |         my $name = $content->{metadata}{FILES}->{media_files}->{name}->[$i]; | 
					
						
							|  |  |  |         $assets{$name} = { | 
					
						
							|  |  |  |             type => $content->{metadata}{FILES}->{media_files}->{type}->[$i], | 
					
						
							|  |  |  |             size => $content->{metadata}{FILES}->{media_files}->{size}->[$i], | 
					
						
							|  |  |  |         }; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Process the assets | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if (%assets) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Save just the file names in an array (@assets) for convenience. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         @assets = keys(%assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # Check that there's audio in the media. | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # First look for any assets marked as audio or video (this one because | 
					
						
							|  |  |  |         # some audio has been seen to be marked this way). We haven't expanded | 
					
						
							|  |  |  |         # archive files yet because we don't want to eliminate those files | 
					
						
							|  |  |  |         # from @assets. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $has_audio = @audio | 
					
						
							|  |  |  |             = grep { $assets{$_}->{type} =~ /^(audio|video)/i } keys(%assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Now, if no audio has yet been found, look for audio file extensions | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         unless ($has_audio) { | 
					
						
							|  |  |  |             $has_audio = @audio | 
					
						
							|  |  |  |                 = grep { $_ =~ /\.(aac|flac|mp3|ogg|spx|wav)$/i } | 
					
						
							|  |  |  |                 keys(%assets); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Check there is not more than one audio file | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ($#audio > 1) { | 
					
						
							|  |  |  |             print colored( | 
					
						
							|  |  |  |                 "**Warning** More than one audio file uploaded", 'red'), "\n"; | 
					
						
							|  |  |  |             $log->warning( $showno, "More than one audio file uploaded" ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # We have audio. Use the @audio array to remove the matches from | 
					
						
							|  |  |  |         # @assets. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ($has_audio) { | 
					
						
							|  |  |  |             @assets = array_difference( \@assets, \@audio ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (@assets) { | 
					
						
							|  |  |  |             _debug( $DEBUG > 2, '@assets: ' . Dumper( \@assets ) ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # Look for un-processed markup in the assets hash. For the moment we | 
					
						
							|  |  |  |         # only look for Markdown. | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         $has_markup = @markup | 
					
						
							|  |  |  |             = grep { $assets{$_}->{type} =~ m{^text/markdown$} } keys(%assets); | 
					
						
							|  |  |  |         $markup = join( ', ', @markup ); | 
					
						
							|  |  |  |         if (@markup) { | 
					
						
							|  |  |  |             _debug( $DEBUG > 2, '@markup ' . Dumper( \@markup ) ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # Look for archive files in the assets | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         if ( find_archive( \%assets, \@archives ) > 0 ) { | 
					
						
							|  |  |  |             _debug( $DEBUG > 1, '@archives: ' . Dumper( \@archives ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Extract the contents of any archives into the show directory and | 
					
						
							|  |  |  |             # write details into %assets and @extracted. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $has_archives | 
					
						
							|  |  |  |                 = ( extract_archives( \%assets, \@archives, \@extracted, $showdir ) > 0 ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # No archives, but we may have assets. Keep copies of the assets | 
					
						
							|  |  |  |             # in case we need to make destructive changes. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             if (@assets && $zipfile) { | 
					
						
							|  |  |  |                 my @input = map {"$showdir/$_"} @assets; | 
					
						
							|  |  |  |                 _debug( $DEBUG > 2, 'Zip creation - @input: ' . Dumper( | 
					
						
							|  |  |  |                         \@input ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 zip \@input => $zipfile | 
					
						
							|  |  |  |                     or die "Zip creation failed: $ZipError\n"; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # Refresh assets after expanding archives | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Remove archive files and add extracted files to the assets. We may | 
					
						
							|  |  |  |         # have a different number than we started with because any archives found | 
					
						
							|  |  |  |         # will have been expanded and the archive names removed. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         @assets = array_difference(\@assets,\@archives); | 
					
						
							|  |  |  |         push(@assets, @extracted); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  |         # | 
					
						
							|  |  |  |         # Remove markup files from the assets so we don't upload them | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         @assets = array_difference(\@assets,\@markup); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         # | 
					
						
							|  |  |  |         # Remove directory stuff from @assets elements and %assets keys | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # foreach my $file (@assets) { | 
					
						
							|  |  |  |         #     ( my $key = $file ) =~ s{^.*/}{}; | 
					
						
							|  |  |  |         #     $assets{$key} = delete( $assets{$file} ); | 
					
						
							|  |  |  |         #     $file = $key; | 
					
						
							|  |  |  |         # } | 
					
						
							|  |  |  |         prepare_assets( \%assets, \@assets, $showdir, $assetdir ); | 
					
						
							|  |  |  |         if (@assets) { | 
					
						
							|  |  |  |             _debug( $DEBUG > 1, 'After prepare_assets, %assets: ' . Dumper( \%assets ) ); | 
					
						
							|  |  |  |             _debug( $DEBUG > 1, 'After prepare_assets, @assets: ' . Dumper( \@assets ) ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # Sanitise any filenames that need it | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         $sanitised = sanitise_filenames( \%assets, \@assets, $assetdir ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         # Look for any image files in the assets hash | 
					
						
							|  |  |  |         #------------------------------------------------------------------------------- | 
					
						
							|  |  |  |         $has_pictures = @pictures | 
					
						
							|  |  |  |             = grep { $assets{$_}->{type} =~ /^image/ } keys(%assets); | 
					
						
							|  |  |  |         $pictures = join( ', ', @pictures ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # if (@archives = | 
					
						
							|  |  |  |         #     grep { $assets{$_}->{type} =~ q{^application/zip} } keys(%assets)) { | 
					
						
							|  |  |  |         #     my $archive = Archive::Any->new( "$showdir/$archives[0]" ); | 
					
						
							|  |  |  |         #     @archived = $archive->files; | 
					
						
							|  |  |  |         #     $has_pictures += scalar(@archived); | 
					
						
							|  |  |  |         # } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # print "D> Pictures found\n" if $has_pictures; | 
					
						
							|  |  |  |         # if ($has_pictures && @archived && $DEBUG > 2) { | 
					
						
							|  |  |  |         #    if ($has_pictures && $DEBUG > 2) { | 
					
						
							|  |  |  |         #        _debug (1, '@archives: ' . Dumper(\@archives)); | 
					
						
							|  |  |  |         #        _debug (1, '@archived: ' . Dumper(\@archived)); | 
					
						
							|  |  |  |         #    } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ( $has_pictures && $DEBUG > 2 ) { | 
					
						
							|  |  |  |             _debug( 1, '@pictures: ' . Dumper( \@pictures ) ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Look for files not matching audio or image | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $has_extra = grep { $assets{$_}->{type} !~ /^(image|audio|video)/ } | 
					
						
							|  |  |  |             keys(%assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         _debug( $DEBUG > 2, '%assets: ' . Dumper( \%assets ) ) if (%assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $log->info( $showno, "Media files: $media_files" ) if ($media_files); | 
					
						
							|  |  |  |         $log->info( $showno, "Pictures: " . join( ', ', @pictures ) ) | 
					
						
							|  |  |  |             if (@pictures); | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  |         $log->info( $showno, "Markup " . join( ', ', @markup ) ) | 
					
						
							|  |  |  |             if (@markup); | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         $log->info( $showno, "Assets: " . join( ', ', @assets ) ) | 
					
						
							|  |  |  |             if (@assets); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # No media files at all! | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $has_archives = $has_pictures = $has_audio = $has_extra = 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Sanity checks | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # 1. Check for included media or a download URL. Alert if neither | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ($media_files) { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Have media, but do we have audio? | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # printf STDERR $ofmt, "Media files:", $media_files; | 
					
						
							|  |  |  |     printf STDERR "%s\n", | 
					
						
							|  |  |  |         textFormat( $media_files, 'Media files:', 'L', 18, 80 ); | 
					
						
							|  |  |  |     unless ($has_audio) { | 
					
						
							|  |  |  |         alert( $ofmt, "No audio!" ); | 
					
						
							|  |  |  |         $log->error( $showno, "No audio!" ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | elsif ( length( $content->{metadata}{url} ) > 0 ) { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # No media, but do we have a download URL? | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Download from:", $content->{metadata}{url}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # No media and no download URL, so no audio | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     alert( $ofmt, "No audio!" ); | 
					
						
							|  |  |  |     $log->error( $showno, "No audio!" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # 2. Alert if no tags | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( length( $content->{episode}{Tags} ) == 0 ) { | 
					
						
							|  |  |  |     alert( $ofmt, "No tags!" ); | 
					
						
							|  |  |  |     $log->error( $showno, "No tags!" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # 3. Declared html5 but no HTML found | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( $snlevel == 0 && $content->{metadata}{Shownotes_Format} =~ /html5/i ) { | 
					
						
							|  |  |  |     alert( $ofmt, "Declared format 'html5' with no HTML in the notes!" ); | 
					
						
							|  |  |  |     $log->error( $showno, | 
					
						
							|  |  |  |         "Declared format 'html5' with no HTML in the notes!" ); | 
					
						
							|  |  |  |     #    printf STDERR $ofmt, "", | 
					
						
							|  |  |  |     #        "${yellow}Forced format to 'plain_text'.${reset}"; | 
					
						
							|  |  |  |     printf STDERR colored( | 
					
						
							|  |  |  |         sprintf( $ofmt, "", "Forced format to 'plain_text'." ), | 
					
						
							|  |  |  |         'bold yellow' ); | 
					
						
							|  |  |  |     $content->{metadata}{Shownotes_Format} = 'plain_text'; | 
					
						
							|  |  |  |     # TODO: propagate the format change into the JSON | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # 4. Tags aren't CSV. Not much of a check since so many formats are acceptable | 
					
						
							|  |  |  | #    here but not in the database. | 
					
						
							|  |  |  | #    If there's only one tag write an alert reporting the length in case the | 
					
						
							|  |  |  | #    sender forgot the commas. | 
					
						
							|  |  |  | #    TODO: Reconsider after using for a while. May be no point in doing this. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( length( $content->{episode}{Tags} ) > 0 ) { | 
					
						
							|  |  |  |     my $csv = Text::CSV->new( { sep_char => ',', allow_whitespace => 1 } ) | 
					
						
							|  |  |  |         or warn "Problem with Text::CSV: $@"; | 
					
						
							|  |  |  |     my $status = $csv->parse( $content->{episode}{'Tags'} ); | 
					
						
							|  |  |  |     unless ($status) { | 
					
						
							|  |  |  |         alert( $ofmt, $csv->error_input() . "\n" . $csv->error_diag() ); | 
					
						
							|  |  |  |         $log->error( $showno, "Tag format is invalid" ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     my @cols = $csv->fields(); | 
					
						
							|  |  |  |     if ( scalar(@cols) eq 1 ) { | 
					
						
							|  |  |  |         alert( $ofmt, | 
					
						
							|  |  |  |             "Only one tag " . length( $cols[0] ) . " characters long!" ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # 5. Declared HTML, but does it have assets and if so are they properly | 
					
						
							|  |  |  | #    pointed to? | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( $snlevel > 0 && $content->{metadata}{Shownotes_Format} =~ /html5/i ) { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Perform a check on the notes in memory, looking for asset links and | 
					
						
							|  |  |  |     # checking they are kosher | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     unless ( check_html( $content->{episode}{Show_Notes}, $base_url ) ) { | 
					
						
							|  |  |  |         print colored( | 
					
						
							|  |  |  |             "Apparently incorrect URLs detected in the notes", 'red' | 
					
						
							|  |  |  |             ), "\n"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # 6. Declared plain text, but analysis recommends HTML, so highlight this to | 
					
						
							|  |  |  | #    allow changes to be made. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #if ( $snlevel > 5 && $content->{metadata}{Shownotes_Format} =~ /plain_text/i ) { | 
					
						
							|  |  |  | if (   $markup_choice eq 'html5' | 
					
						
							|  |  |  |     && $content->{metadata}{Shownotes_Format} =~ /plain_text/i ) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     alert( $ofmt, | 
					
						
							|  |  |  |         "Declared format 'plain_text' but notes seem to be HTML5!" ); | 
					
						
							|  |  |  |     $log->error( $showno, | 
					
						
							|  |  |  |         "Declared format 'plain_text' but notes seem to be HTML5!" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | # | 
					
						
							|  |  |  | # 7. The host has sent in markup version(s) of their external notes, so we | 
					
						
							|  |  |  | #    need to take special action. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ($markup) { | 
					
						
							|  |  |  |     printf STDERR "%s\n", | 
					
						
							|  |  |  |         textFormat( $markup, 'Markup files:', 'L', 18, 80 ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Determine the picture asset state | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # If there are pictures and the format is plain text then the state is 'Pictures | 
					
						
							|  |  |  | # that need management' (1), otherwise 'Managed pictures' (2). If no pictures | 
					
						
							|  |  |  | # then state is 'No pictures found' (0). | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ($has_pictures) { | 
					
						
							|  |  |  |     $astate | 
					
						
							|  |  |  |         = ( $content->{metadata}{Shownotes_Format} eq 'plain_text' ? 1 : 2 ); | 
					
						
							|  |  |  |     # printf STDERR $ofmt, "Pictures:", $pictures; | 
					
						
							|  |  |  |     printf STDERR "%s\n", textFormat( $pictures, 'Pictures:', 'L', 18, 80 ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     $astate = 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | printf STDERR $ofmt, "Picture state:", | 
					
						
							|  |  |  |     colour_if( $astate > 0, 'bold yellow on_magenta', $pstates[$astate] ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Flag other assets so they aren't missed | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | if ($has_extra) { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Asset state:", | 
					
						
							|  |  |  |         colored( ['bold yellow on_magenta'], 'Other files uploaded' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if (@assets) { | 
					
						
							|  |  |  |     printf STDERR $ofmt, "Archives: ", join( ', ', @archives ) | 
					
						
							|  |  |  |         if $has_archives; | 
					
						
							|  |  |  |     # printf STDERR $ofmt, "All assets:", join( ', ', @assets ); | 
					
						
							|  |  |  |     printf STDERR "%s\n", | 
					
						
							|  |  |  |         textFormat( join( ', ', @assets ), 'All assets:', 'L', 18, 80 ); | 
					
						
							|  |  |  |     if ( $sanitised > 0 ) { | 
					
						
							|  |  |  |         printf STDERR $ofmt, 'Assets sanitised:', "$sanitised"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # End of content report | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | print STDERR '-' x 80, "\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Log choices and problems | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | $log->info( $showno, | 
					
						
							|  |  |  |     "Declared format: " . $content->{metadata}{Shownotes_Format} ); | 
					
						
							|  |  |  | $log->info( $showno, "Recommended markup processing: $markup_choice" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Output to relevant files | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Write the declared format to a file if requested | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( defined($formatfile) ) { | 
					
						
							|  |  |  |     open( my $fmtfile, '>:encoding(UTF-8)', $formatfile ) | 
					
						
							|  |  |  |         or die "Unable to open output file $formatfile $!\n"; | 
					
						
							|  |  |  |     print $fmtfile $content->{metadata}{Shownotes_Format}, "\n"; | 
					
						
							|  |  |  |     close($fmtfile); | 
					
						
							|  |  |  |     print colored( "Format details written to $formatfile", 'green' ), "\n" | 
					
						
							|  |  |  |         unless $silent; | 
					
						
							|  |  |  |     $log->info( $showno, "Format details written to $formatfile" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Write the release date to a file if requested | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( defined($releasefile) ) { | 
					
						
							|  |  |  |     open( my $relfile, '>:encoding(UTF-8)', $releasefile ) | 
					
						
							|  |  |  |         or die "Unable to open output file $releasefile $!\n"; | 
					
						
							|  |  |  |     print $relfile $content->{metadata}{Episode_Date}, "\n"; | 
					
						
							|  |  |  |     close($relfile); | 
					
						
							|  |  |  |     print colored( "Release date written to $releasefile", 'green' ), "\n" | 
					
						
							|  |  |  |         unless $silent; | 
					
						
							|  |  |  |     $log->info( $showno, "Release date written to $releasefile" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Write the picture file names to a file if requested and there are some | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( @pictures && defined($picturefile) ) { | 
					
						
							|  |  |  |     open( my $picfile, '>:encoding(UTF-8)', $picturefile ) | 
					
						
							|  |  |  |         or die "Unable to open output file $picturefile $!\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Sort the pictures alphabetically in case the host didn't order them | 
					
						
							|  |  |  |     # sensibly. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $picture (sort(@pictures)) { | 
					
						
							|  |  |  |         print $picfile "$picture\n"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     close($picfile); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     print colored( "Picture names written to $picturefile", 'green' ), "\n" | 
					
						
							|  |  |  |         unless $silent; | 
					
						
							|  |  |  |     $log->info( $showno, "Picture names written to $picturefile" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Write the asset file names to a file if requested and there are some | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ( @assets && defined($assetfile) ) { | 
					
						
							|  |  |  |     open( my $assfile, '>:encoding(UTF-8)', $assetfile ) | 
					
						
							|  |  |  |         or die "Unable to open output file $assetfile $!\n"; | 
					
						
							|  |  |  |     foreach my $asset (@assets) { | 
					
						
							|  |  |  |         print $assfile "$asset\n"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     close($assfile); | 
					
						
							|  |  |  |     print colored( "Asset names written to $assetfile", 'green' ), "\n" | 
					
						
							|  |  |  |         unless $silent; | 
					
						
							|  |  |  |     $log->info( $showno, "Asset names written to $assetfile" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | # Write the shownotes component to a file if requested | 
					
						
							|  |  |  | #------------------------------------------------------------------------------- | 
					
						
							|  |  |  | if ($shownotes) { | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Remove carriage returns | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $content->{episode}{Show_Notes} =~ s/\r//g; | 
					
						
							|  |  |  |     _debug( $DEBUG > 2, quote_crlf( $content->{episode}{Show_Notes} ) ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Save the notes | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     open( my $out, '>:encoding(UTF-8)', $shownotes ) | 
					
						
							|  |  |  |         or die "Unable to open output file $shownotes: $!\n"; | 
					
						
							|  |  |  |     print $out $content->{episode}{Show_Notes}, "\n"; | 
					
						
							|  |  |  |     close($out); | 
					
						
							|  |  |  |     print colored( "Shownotes written to $shownotes", 'green' ), "\n" | 
					
						
							|  |  |  |         unless $silent; | 
					
						
							|  |  |  |     $log->info( $showno, "Shownotes written to $shownotes" ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # We're generating shownotes and we have declared HTML (or we're pretty | 
					
						
							|  |  |  |     # sure after parsing that they are HTML), so save the notes as HTML as | 
					
						
							|  |  |  |     # well. Now achieved simply by symlinking the two files if they are | 
					
						
							|  |  |  |     # different. This allows the 'do_edit` script to edit the *.out file and | 
					
						
							|  |  |  |     # thereby affect the *.html file. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # 2018-09-13: Removed the check for $markup_choice because it's adding | 
					
						
							|  |  |  |     # links inappropriately. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # 2020-05-31: Turned the symlink to a hard link | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if ( $content->{metadata}{Shownotes_Format} =~ /html5/i ) { | 
					
						
							|  |  |  |         ( my $htmlfile = $shownotes ) =~ s/\.[^.]+$/.html/; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         unless ( $htmlfile eq $shownotes ) { | 
					
						
							|  |  |  |             if ( link( $shownotes, $htmlfile ) ) { | 
					
						
							|  |  |  |                 print colored( | 
					
						
							|  |  |  |                     "HTML notes - linked $htmlfile to $shownotes", 'green' | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                     "\n"; | 
					
						
							|  |  |  |                 $log->info( $showno, | 
					
						
							|  |  |  |                     "HTML notes - linked $htmlfile to $shownotes" ); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 print colored( | 
					
						
							|  |  |  |                     "**Warning** Failed to link $htmlfile to $shownotes", | 
					
						
							|  |  |  |                     'red' | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                     "\n"; | 
					
						
							|  |  |  |                 print colored( "Equivalent CLI command:", 'yellow' ), | 
					
						
							|  |  |  |                     "\n", | 
					
						
							|  |  |  |                     colored( "ln $shownotes $htmlfile", 'yellow' ), "\n"; | 
					
						
							|  |  |  |                 $log->warning( $showno, | 
					
						
							|  |  |  |                     "Failed to link $htmlfile to $shownotes" ); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | else { | 
					
						
							|  |  |  |     print colored( "No shownotes written", 'green' ), "\n" unless $silent; | 
					
						
							|  |  |  |     $log->info( $showno, "No shownotes written" ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Save the status | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | update_status($statusfile,'parsed'); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | exit; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: find_archive | 
					
						
							|  |  |  | #      PURPOSE: Look though the assets for files that are some kind of archive | 
					
						
							|  |  |  | #   PARAMETERS: $assets         hashref pointing to the asset filename list | 
					
						
							|  |  |  | #                               (contains size and type information) | 
					
						
							|  |  |  | #               $archfiles      arrayref to hold the archive files found | 
					
						
							|  |  |  | #      RETURNS: Number of archive files found | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: Uses 'any' from 'List::MoreUtils' | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub find_archive { | 
					
						
							|  |  |  |     my ( $assets, $archfiles ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $count, $type ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Return if no assets given | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return 0 unless %$assets; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Known archive types (from what's been seen so far). Declared as | 
					
						
							|  |  |  |     # a 'state' variable so it's only initialised once. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     state @mime_types = ( | 
					
						
							|  |  |  |         #{{{ | 
					
						
							|  |  |  |         'application/x-tar', | 
					
						
							|  |  |  |         'application/x-bzip-compressed-tar', | 
					
						
							|  |  |  |         'application/x-compressed-tar', | 
					
						
							|  |  |  |         'application/x-gtar', | 
					
						
							|  |  |  |         'application/zip', | 
					
						
							|  |  |  |         'application/x-zip-compressed', | 
					
						
							|  |  |  |         'application/x-bzip2', | 
					
						
							|  |  |  |         'application/gzip', | 
					
						
							|  |  |  |         #}}} | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $count = 0; | 
					
						
							|  |  |  |     foreach my $file ( keys(%$assets) ) { | 
					
						
							|  |  |  |         $type = $assets->{$file}->{type}; | 
					
						
							|  |  |  |         if ( any { $_ eq $type } @mime_types ) { | 
					
						
							|  |  |  |             push( @$archfiles, $file ); | 
					
						
							|  |  |  |             $count++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $count; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: extract_archives | 
					
						
							|  |  |  | #      PURPOSE: Given a list of archive files, extract their contents and save | 
					
						
							|  |  |  | #               the file names | 
					
						
							|  |  |  | #   PARAMETERS: $assets         hashref containing the filename list and some | 
					
						
							|  |  |  | #                               attributes. Read and written. | 
					
						
							|  |  |  | #               $archfiles      arrayref holding the archive files found (by | 
					
						
							|  |  |  | #                               find_archive). Read only. | 
					
						
							|  |  |  | #               $extracted      arrayref to hold the files extracted from the | 
					
						
							|  |  |  | #                               archives. | 
					
						
							|  |  |  | #               $showdir        path to find the archive file(s) | 
					
						
							|  |  |  | #      RETURNS: Number of files extracted. Alters %$assets to reflect the new | 
					
						
							|  |  |  | #               files and remove the extracted archive files. | 
					
						
							|  |  |  | #  DESCRIPTION: Just return with zero if there are no archives. | 
					
						
							|  |  |  | #               Process the archives one by one. Open the current one with | 
					
						
							|  |  |  | #               Archive::Any and save the file contents to return to the | 
					
						
							|  |  |  | #               caller. Extract the archive contents to the 'uploads' | 
					
						
							|  |  |  | #               directory in the directory for the show. | 
					
						
							|  |  |  | #               Remove all the archive file references from %$assets. Add in | 
					
						
							|  |  |  | #               the extracted files using MIME::Types to get the mime details | 
					
						
							|  |  |  | #               and 'stat' to get the sizes. Keep a cound of the extracted | 
					
						
							|  |  |  | #               files to return to the caller. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub extract_archives { | 
					
						
							|  |  |  |     my ( $assets, $archfiles, $extracted, $showdir ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $aa, $mt, $typename, $filecount ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Return if no archives given | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return 0 unless @$archfiles; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Loop through the archives we were given. There could be more than one, | 
					
						
							|  |  |  |     # though it's likely to be rare. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $arch (@$archfiles) { | 
					
						
							|  |  |  |         #print "D> Archive file: $arch\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Open the archive file | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $aa = Archive::Any->new("$showdir/$arch"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Get and store a list of the contents of the archive | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         push( @$extracted, $aa->files ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Extract everything to the show directory | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         #print "About to extract to $assetdir\n"; | 
					
						
							|  |  |  |         $aa->extract($showdir); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     #print "D> Extracted files: ",join("\nD> ", @$extracted),"\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Now update the %$assets hash, first deleting the archive files we have | 
					
						
							|  |  |  |     # extracted the contents of. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $arch (@$archfiles) { | 
					
						
							|  |  |  |         delete( $assets->{$arch} ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     # _debug(1,'%$assets after deletions: ' . Dumper($assets)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Now add in the extracted files with their details | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $asset (@$extracted) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Rationalise the filename | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $asset = canonpath($asset); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Get the MIME type of the file (stringifying the result of mimeTypeOf | 
					
						
							|  |  |  |         # otherwise it's a MIME::Type object) | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $mt               = MIME::Types->new; | 
					
						
							|  |  |  |         $typename         = "" . $mt->mimeTypeOf("$showdir/$asset"); | 
					
						
							|  |  |  |         $assets->{$asset} = { | 
					
						
							|  |  |  |             type => $typename, | 
					
						
							|  |  |  |             size => ( stat("$showdir/$asset") )[7], | 
					
						
							|  |  |  |         }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $filecount++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     # _debug(1,'%$assets after additions: ' . Dumper($assets)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $filecount; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: array_difference | 
					
						
							|  |  |  | #      PURPOSE: Determine the members of the left array not in the right array | 
					
						
							|  |  |  | #   PARAMETERS: $left           arrayref to the left-hand array | 
					
						
							|  |  |  | #               $right          arrayref to the right-hand array | 
					
						
							|  |  |  | #      RETURNS: The difference between the two arrays (@$left - @$right in | 
					
						
							|  |  |  | #               concept). All the elements that are in @$left but not in | 
					
						
							|  |  |  | #               @$right are returned. | 
					
						
							|  |  |  | #  DESCRIPTION: Loads a hash with keys from the $left array. This will remove | 
					
						
							|  |  |  | #               duplicates as a by-product. Using the $right array to provide | 
					
						
							|  |  |  | #               keys we delete all hash elements that match. This leaves the | 
					
						
							|  |  |  | #               hash containing the difference between the two arrays by | 
					
						
							|  |  |  | #               "subtracting" the right from the left. Another consequence of | 
					
						
							|  |  |  | #               the method used is that the resulting list is not in the same | 
					
						
							|  |  |  | #               order as the $left array. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub array_difference { | 
					
						
							|  |  |  |     my ( $left, $right ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my %files; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     unless (ref($left) eq 'ARRAY' && ref($right) eq 'ARRAY') { | 
					
						
							|  |  |  |         warn "Arguments to array_difference must both be arrayrefs\n"; | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @files{@$left} = ();          # All files are the keys. | 
					
						
							|  |  |  |     delete( @files{@$right} );    # Remove the links. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return keys(%files); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: prepare_assets | 
					
						
							|  |  |  | #      PURPOSE: Tidy assets, removing directories if necessary, and place the | 
					
						
							|  |  |  | #               files in the upload directory | 
					
						
							|  |  |  | #   PARAMETERS: $assets_h       hashref containing asset details | 
					
						
							|  |  |  | #               $assets_a       arrayref containing asset names | 
					
						
							|  |  |  | #               $showdir        string containing the show directory | 
					
						
							|  |  |  | #               $assetdir       string containing the asset upload directory | 
					
						
							|  |  |  | #                               path | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION:  | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub prepare_assets { | 
					
						
							|  |  |  |     my ( $assets_h, $assets_a, $showdir, $assetdir ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $file, $dir, $base, @dels, %dirs, %moves ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Create the asset directory | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     unless ( -e $assetdir ) { | 
					
						
							|  |  |  |         mkdir($assetdir) or die "Failed to create $assetdir\n"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Remove directory stuff from @assets elements and %assets keys saving | 
					
						
							|  |  |  |     # directories, and moving files as we go. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # foreach my $file (@$assets_a) { | 
					
						
							|  |  |  |     for ( my $i = 0; $i <= $#{$assets_a}; $i++ ) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Get and parse an asset filename (using Path::Class). Force the | 
					
						
							|  |  |  |         # directory and base part to become strings ("stringify") | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $file = file( $assets_a->[$i] ); | 
					
						
							|  |  |  |         $dir  = "" . $file->dir; | 
					
						
							|  |  |  |         $base = "" . $file->basename; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Does the asset have a directory part and a basename part? If so we | 
					
						
							|  |  |  |         # want to move the file to the asset directory and eventually delete | 
					
						
							|  |  |  |         # the directory the asset came from. | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # if ( ( $dir, $base ) = ( $file =~ qr{^(.*/)(.+)$} ) ) { | 
					
						
							|  |  |  |         if ( $dir !~ /^\.?$/ && $base ne '' ) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # The asset filename starts with a directory. | 
					
						
							|  |  |  |             # Save it. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $dirs{$dir} = 1 unless exists( $dirs{$dir} ); | 
					
						
							|  |  |  |             # push( @dirs, $dir ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Update the hash key | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $assets_h->{$base} = delete( $assets_h->{$assets_a->[$i]} ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Save the move | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $moves{"$showdir/$file"} = "$assetdir/$base"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Alter the asset name in @assets | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # $file = $base; | 
					
						
							|  |  |  |             $assets_a->[$i] = $base; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         elsif ( $base eq '' ) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # We just have a directory, no basename, so queue to delete it | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             push(@dels,$i); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # No directory (or just '.'), just a basename, so queue the move | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $moves{"$showdir/$base"} = "$assetdir/$base"; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Perform any moves (using File::Copy) | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $move (keys(%moves)) { | 
					
						
							|  |  |  |         # TODO: Is the 'unless' needed? | 
					
						
							|  |  |  |         move ($move,$moves{$move}) unless ($move =~ qr{/$}); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Clean out unwanted directories from @$assets_a | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $del (@dels) { | 
					
						
							|  |  |  |         splice(@$assets_a,$del,1); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Remove any real directories created by unpacking archives | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $d (keys(%dirs)) { | 
					
						
							|  |  |  |         rmdir("$showdir/$d") or | 
					
						
							|  |  |  |             warn "Problem deleting unwanted directory: $d\n"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: sanitise_filenames | 
					
						
							|  |  |  | #      PURPOSE: Deal with filenames with spaces to make life easier | 
					
						
							|  |  |  | #   PARAMETERS: $assets_h       hashref containing asset details | 
					
						
							|  |  |  | #               $assets_a       arrayref containing asset names | 
					
						
							|  |  |  | #               $assetdir       string containing the asset upload directory | 
					
						
							|  |  |  | #                               path | 
					
						
							|  |  |  | #      RETURNS: Number of fixes | 
					
						
							|  |  |  | #  DESCRIPTION:  | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub sanitise_filenames { | 
					
						
							|  |  |  |     my ( $assets_h, $assets_a, $assetdir ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $file, $newfile, %moves, $changes ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Look at all asset filenames | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     for ( my $i = 0; $i <= $#{$assets_a}; $i++ ) { | 
					
						
							|  |  |  |         $file = $assets_a->[$i]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Does the name have whitespace? | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ($file =~ /\s/) { | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Sanitise and save the change | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             ($newfile = $file) =~ s/ /_/g; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Adjust asset array and hash | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $assets_a->[$i] = $newfile; | 
					
						
							|  |  |  |             $assets_h->{$newfile} = delete( $assets_h->{$file} ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Save the file move | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             $moves{"$assetdir/$file"} = "$assetdir/$newfile"; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (%moves) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Perform any moves (using File::Copy) | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $changes = 0; | 
					
						
							|  |  |  |         foreach my $move (keys(%moves)) { | 
					
						
							|  |  |  |             move ($move,$moves{$move}); | 
					
						
							|  |  |  |             $changes++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $changes; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: choose_markup | 
					
						
							|  |  |  | #      PURPOSE: Given a list of possible markup types, choose which one to | 
					
						
							|  |  |  | #               recommend | 
					
						
							|  |  |  | #   PARAMETERS: $markup_found   Arrayref containing markup names | 
					
						
							|  |  |  | #      RETURNS: The markup name to use | 
					
						
							|  |  |  | #  DESCRIPTION: The array referenced by $markup_found should have one or more | 
					
						
							|  |  |  | #               instances of a markup type. We want to decide which one to | 
					
						
							|  |  |  | #               recommend for processing. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub choose_markup { | 
					
						
							|  |  |  |     my ($markup_found) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my %freq; | 
					
						
							|  |  |  |     my @res; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 'none' unless $markup_found && scalar(@$markup_found) > 0; | 
					
						
							|  |  |  |     return 'none' if $markup_found->[0] eq 'none'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Collect frequencies of each markup type | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $mu (@$markup_found) { | 
					
						
							|  |  |  |         if ( exists( $freq{$mu} ) ) { | 
					
						
							|  |  |  |             $freq{$mu}++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             $freq{$mu} = 1; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # If there's only one key then that's the recommendation | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return ( keys(%freq) )[0] if scalar( keys(%freq) ) == 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Find the highest frequency | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     my $max = 0; | 
					
						
							|  |  |  |     foreach my $key ( keys(%freq) ) { | 
					
						
							|  |  |  |         if ( $freq{$key} > $max ) { | 
					
						
							|  |  |  |             $max = $freq{$key}; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Collect all the keys with the maximum frequency | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $key ( keys(%freq) ) { | 
					
						
							|  |  |  |         if ( $freq{$key} == $max ) { | 
					
						
							|  |  |  |             push( @res, $key ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Return the matching markup types | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return join( ' or ', @res ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: summarise_markup | 
					
						
							|  |  |  | #      PURPOSE: Summarise the list of matches to markup types | 
					
						
							|  |  |  | #   PARAMETERS: $markup_found   Arrayref containing markup names | 
					
						
							|  |  |  | #      RETURNS: A string containing markup types and their frequencies | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub summarise_markup { | 
					
						
							|  |  |  |     my ($markup_found) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my %freq; | 
					
						
							|  |  |  |     my @res; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 'none' unless $markup_found && scalar(@$markup_found) > 0; | 
					
						
							|  |  |  |     return 'none' if $markup_found->[0] eq 'none'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Collect frequencies of each markup type | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     foreach my $mu (@$markup_found) { | 
					
						
							|  |  |  |         if ( exists( $freq{$mu} ) ) { | 
					
						
							|  |  |  |             $freq{$mu}++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             $freq{$mu} = 1; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     foreach my $key ( sort( keys(%freq) ) ) { | 
					
						
							|  |  |  |         push( @res, "$key($freq{$key})" ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return join( ", ", @res ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: detect_markup | 
					
						
							|  |  |  | #      PURPOSE: Tries to detect whether there is any markup in the string and | 
					
						
							|  |  |  | #               if so what it is | 
					
						
							|  |  |  | #   PARAMETERS: $string         String to examine | 
					
						
							|  |  |  | #               $markup_types   Arrayref pointing to regular expression | 
					
						
							|  |  |  | #                               structures to be used in the detection process | 
					
						
							|  |  |  | #               $ofmt           Output format for 'printf' | 
					
						
							|  |  |  | #      RETURNS: A string containing the markup type(s) or 'none' | 
					
						
							|  |  |  | #  DESCRIPTION: Applies regular expressions to the string (notes) looking for | 
					
						
							|  |  |  | #               markup signatures. Array @markup_types contains one hash per | 
					
						
							|  |  |  | #               regex, along with an array of markdown types the regex relates | 
					
						
							|  |  |  | #               to and a comment explaining what's being detected. The regex | 
					
						
							|  |  |  | #               is matched against the string and a count of matches returned. | 
					
						
							|  |  |  | #               If there are matches then an analysis is printed on STDERR. | 
					
						
							|  |  |  | #               The variable MARKUP_DETECTED is a global variable accessed | 
					
						
							|  |  |  | #               through 'our' which controls whether the analyses have | 
					
						
							|  |  |  | #               a single header. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub detect_markup { | 
					
						
							|  |  |  |     my ( $string, $markup_types, $ofmt ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( @type, $count ); | 
					
						
							|  |  |  |     our $MARKUP_DETECTED; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ['none'] unless $string; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     foreach my $test (@$markup_types) { | 
					
						
							|  |  |  |         if ( $count = () = ( $string =~ /$test->{regex}/g ) ) { | 
					
						
							|  |  |  |             print STDERR "Markup check:\n" if ( $MARKUP_DETECTED++ == 0 ); | 
					
						
							|  |  |  |             printf STDERR $ofmt, "- Matched:", | 
					
						
							|  |  |  |                 $test->{regex} . " -> " . join( ",", @{ $test->{type} } ); | 
					
						
							|  |  |  |             printf STDERR $ofmt, "  Comment:", $test->{comment}; | 
					
						
							|  |  |  |             printf STDERR $ofmt, "  Matches:", $count; | 
					
						
							|  |  |  |             push( @type, @{ $test->{type} } ) for 1 .. $count; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ( @type ? \@type : ['none'] ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: count_tags | 
					
						
							|  |  |  | #      PURPOSE: Counts the number of start tags in what might be HTML | 
					
						
							|  |  |  | #   PARAMETERS: $string         String to examine | 
					
						
							|  |  |  | #      RETURNS: An integer count of the number of start tags found | 
					
						
							|  |  |  | #  DESCRIPTION: Uses HTML::Parser to parse the input string. A handler is | 
					
						
							|  |  |  | #               defined which accumulates start and end tags in an array. The | 
					
						
							|  |  |  | #               number of start tags found is returned to the caller. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub count_tags { | 
					
						
							|  |  |  |     my ($string) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my @accum; | 
					
						
							|  |  |  |     chomp($string); | 
					
						
							|  |  |  |     return 0 unless $string; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my $p = HTML::Parser->new( | 
					
						
							|  |  |  |         api_version => 3, | 
					
						
							|  |  |  |         handlers    => { | 
					
						
							|  |  |  |             start => [ \@accum, "event,text" ], | 
					
						
							|  |  |  |             end   => [ \@accum, "event,text" ] | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  |     $p->parse($string); | 
					
						
							|  |  |  |     $p->eof; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return scalar(@accum); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: html_level | 
					
						
							|  |  |  | #      PURPOSE: Returns the proportion of HTML in a string | 
					
						
							|  |  |  | #   PARAMETERS: $string         String to examine | 
					
						
							|  |  |  | #               $tag_count      Number of start & end tags | 
					
						
							|  |  |  | #      RETURNS: A floating point number representing the (very) rough | 
					
						
							|  |  |  | #               percentage of HTML tags in the string | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub html_level { | 
					
						
							|  |  |  |     my ( $string, $tag_count ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return '0.0' unless $string; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     chomp($string); | 
					
						
							|  |  |  |     my @words = split( /\s+/, $string ); | 
					
						
							|  |  |  |     my $wc    = scalar(@words); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return sprintf( "%.2f", ( $tag_count / $wc ) * 100 ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: check_html | 
					
						
							|  |  |  | #      PURPOSE: Checks an HTML string to ensure HPR links are valid | 
					
						
							|  |  |  | #   PARAMETERS: $html           HTML string | 
					
						
							|  |  |  | #      RETURNS: True if valid, otherwise false | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub check_html { | 
					
						
							|  |  |  |     my ( $html, $base_url ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $parser, @links, $result, $etype, $aname, $avalue ); | 
					
						
							|  |  |  |     my ( $base_re, $show_re, $host_re, $anchor_re, $asset_re ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     chomp($html); | 
					
						
							|  |  |  |     return 1 unless $html; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $parser = HTML::LinkExtor->new( undef, $base_url ); | 
					
						
							|  |  |  |     $parser->parse($html); | 
					
						
							|  |  |  |     @links = $parser->links; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     #    print Dumper( \@links ), "\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Make some regular expressions. Note that the $base_url is expected to | 
					
						
							|  |  |  |     # end with a '/' | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $base_re   = qr{^${base_url}}; | 
					
						
							|  |  |  |     $host_re   = qr{${base_re}correspondents.php\?hostid=\d{1,4}$}; | 
					
						
							|  |  |  |     $show_re   = qr{${base_re}eps.php\?id=\d{1,4}$}; | 
					
						
							|  |  |  |     $anchor_re = qr{${base_re}\#.+$}; | 
					
						
							|  |  |  |     $asset_re  = qr{${base_re}eps/hpr\d{1,4}/.+$}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Walk the collected links | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $result = 0; | 
					
						
							|  |  |  |     foreach my $element (@links) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Collect the parsed links | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         ( $etype, $aname, $avalue ) = @$element; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Only check HPR links | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         if ( $avalue =~ /$base_re/ ) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Not interested in correctly formed asset or show links | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             return 1 | 
					
						
							|  |  |  |                 if ( $avalue =~ /$show_re|$host_re|$anchor_re|$asset_re/ ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Something seems to be wrong with a link | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             if ( $etype eq 'a' ) { | 
					
						
							|  |  |  |                 $result++; | 
					
						
							|  |  |  |                 print "<A> tag $avalue\n" unless $silent; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             elsif ( $etype eq 'img' ) { | 
					
						
							|  |  |  |                 $result++; | 
					
						
							|  |  |  |                 print "<A> tag $avalue\n" unless $silent; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $result == 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: find_Unicode | 
					
						
							|  |  |  | #      PURPOSE: Scans a text string for Unicode in case we want to remove it | 
					
						
							|  |  |  | #   PARAMETERS: $string         String to examine | 
					
						
							|  |  |  | #      RETURNS: True (1) if Unicode is found, otherwise False (0) | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub find_Unicode { | 
					
						
							|  |  |  |     my ($string) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ( $string =~ /[^\x{00}-\x{7F}]/ ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: trim | 
					
						
							|  |  |  | #      PURPOSE: Trims leading and trailing spaces from a string | 
					
						
							|  |  |  | #   PARAMETERS: string          string to trim | 
					
						
							|  |  |  | #      RETURNS: Trimmed string | 
					
						
							|  |  |  | #  DESCRIPTION:  | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub trim { | 
					
						
							|  |  |  |     my ($str) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $str =~ s/^\s+|\s+$//g; | 
					
						
							|  |  |  |     return $str; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: output_file_name | 
					
						
							|  |  |  | #      PURPOSE: Generate an output file name with three choices | 
					
						
							|  |  |  | #   PARAMETERS: $optarg         the argument to the option choosing the filename | 
					
						
							|  |  |  | #               $showno         the show number to add to certain name types | 
					
						
							|  |  |  | #               $template       a default 'sprintf' template for the name | 
					
						
							|  |  |  | #      RETURNS: The name of the output file | 
					
						
							|  |  |  | #  DESCRIPTION: If there's a defined output filename then there are three | 
					
						
							|  |  |  | #               options: a null string, a plain filename and a substitution | 
					
						
							|  |  |  | #               string with '%d' sequences. The null string means the user used | 
					
						
							|  |  |  | #               '-option' without a value, so we want to generate a substitution | 
					
						
							|  |  |  | #               string. A string with '%d' requires a check to ensure there's | 
					
						
							|  |  |  | #               the right number, just one. The plain filename needs no more | 
					
						
							|  |  |  | #               work. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub output_file_name { | 
					
						
							|  |  |  |     my ( $optarg, $showno, $template ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $filename, $count ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # We shouldn't be called with a null option argument | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return unless defined($optarg); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Does the option have an argument? | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     if ( $optarg =~ /^$/ ) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # No argument; use the show number from the -episode=N option | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $filename = sprintf( $template, $showno ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     elsif ( $optarg =~ /%d/ ) { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # There's an argument, does it have a '%d' in it? | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $count = () = $optarg =~ /%d/g; | 
					
						
							|  |  |  |         die "Invalid - too many '%d' sequences in '$optarg'\n" | 
					
						
							|  |  |  |             if ( $count > 1 ); | 
					
						
							|  |  |  |         $filename = sprintf( $optarg, $showno ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # It's a plain filename, just return it | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         $filename = $optarg; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $filename; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: update_status | 
					
						
							|  |  |  | #      PURPOSE: Updates the status file | 
					
						
							|  |  |  | #   PARAMETERS: $sfile          Name and path of the status file | 
					
						
							|  |  |  | #               $state          State to set, such as 'parsed' | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Appends the state string to the named file. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub update_status { | 
					
						
							|  |  |  |     my ($sfile, $state) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     open( my $sfh, '>>:encoding(UTF-8)', $sfile ) | 
					
						
							|  |  |  |         or die "Unable to open output file $sfile $!\n"; | 
					
						
							|  |  |  |     print $sfh "$state\n"; | 
					
						
							|  |  |  |     close($sfh); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: spellcheck | 
					
						
							|  |  |  | #      PURPOSE: Perform simple spell checks on strings | 
					
						
							|  |  |  | #   PARAMETERS: $string         Input string to check | 
					
						
							|  |  |  | #      RETURNS: List containing any problem words or undef if none | 
					
						
							|  |  |  | #  DESCRIPTION:  | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub spellcheck { | 
					
						
							|  |  |  |     my ($string) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my @errors; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my $checker = Text::SpellChecker->new( | 
					
						
							|  |  |  |         text => $string, | 
					
						
							|  |  |  |         lang => 'en_GB.UTF-8' | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while ( my $word = $checker->next_word ) { | 
					
						
							|  |  |  |         push( @errors, $word ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return @errors; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: textFormat | 
					
						
							|  |  |  | #      PURPOSE: Formats a block of text in an indented, wrapped style with | 
					
						
							|  |  |  | #               a label in the left column | 
					
						
							|  |  |  | #   PARAMETERS: $text           The text to be formatted, as a scalar string | 
					
						
							|  |  |  | #               $tag            The label to be added to the left of the top | 
					
						
							|  |  |  | #                               line | 
					
						
							|  |  |  | #               $align          Tag alignment, 'L' for left, otherwise right | 
					
						
							|  |  |  | #               $lmargin        Width of the left margin (assumed to be big | 
					
						
							|  |  |  | #                               enough for the tag) | 
					
						
							|  |  |  | #               $textwidth      The width of all of the text plus left margin | 
					
						
							|  |  |  | #                               (i.e.  the right margin) | 
					
						
							|  |  |  | #      RETURNS: The formatted result as a string | 
					
						
							|  |  |  | #  DESCRIPTION: Chops the incoming text into words (thereby removing any | 
					
						
							|  |  |  | #               formatting). Removes any leading spaces. Loops through the | 
					
						
							|  |  |  | #               wordlist building them into lines of the right length to fit | 
					
						
							|  |  |  | #               between the left and right margins. Saves the lines in an | 
					
						
							|  |  |  | #               array. Adds the tag to the first line with the alignment | 
					
						
							|  |  |  | #               requested then returns the array joined into a string. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: Inspired by Text::Format but *much* simpler. In fact T::F is | 
					
						
							|  |  |  | #               a nasty thing to have to use; I couldn't get it to do what | 
					
						
							|  |  |  | #               this routine does. | 
					
						
							|  |  |  | #               TODO Make the routine more resilient to silly input values. | 
					
						
							|  |  |  | #     SEE ALSO: | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub textFormat { | 
					
						
							|  |  |  |     my ( $text, $tag, $align, $lmargin, $textwidth ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my ( $width, $word ); | 
					
						
							|  |  |  |     my ( @words, @buff, @wrap ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Build the tag early. If there's no text we'll just return the tag. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $tag = sprintf( "%*s", | 
					
						
							|  |  |  |         ( $align =~ /L/i ? ( $lmargin - 1 ) * -1 : $lmargin - 1 ), $tag ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $tag unless $text; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $text =~ s/(^\s+|\s+$)//g; | 
					
						
							|  |  |  |     return $tag unless $text; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Chop up the incoming text removing leading spaces | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     @words = split( /\s+/, $text ); | 
					
						
							|  |  |  |     shift(@words) if ( @words && $words[0] eq '' ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Compute the width of the active text | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     $width = $textwidth - $lmargin; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Format the words into lines with a blank left margin | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     while ( defined( $word = shift(@words) ) ) { | 
					
						
							|  |  |  |         if ( length( join( ' ', @buff, $word ) ) < $width ) { | 
					
						
							|  |  |  |             push( @buff, $word ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             push( @wrap, ' ' x $lmargin . join( ' ', @buff ) ); | 
					
						
							|  |  |  |             @buff = ($word); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Append any remainder | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     push( @wrap, ' ' x $lmargin . join( ' ', @buff ) ) if @buff; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Insert the tag into the first line | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     substr( $wrap[0], 0, $lmargin - 1 ) = $tag; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Return the formatted array as a string | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return join( "\n", @wrap ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: colour_if | 
					
						
							|  |  |  | #      PURPOSE: Colours a string if a condition is true | 
					
						
							|  |  |  | #   PARAMETERS: $test           criterion to determine colouring | 
					
						
							|  |  |  | #               $colour         colour code to use | 
					
						
							|  |  |  | #               $string         string to colour | 
					
						
							|  |  |  | #      RETURNS: The (non-)coloured string | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub colour_if { | 
					
						
							|  |  |  |     my ( $test, $colour, $string ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ($test) { | 
					
						
							|  |  |  |         return colored( $string, $colour ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         return $string; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: colour_switch | 
					
						
							|  |  |  | #      PURPOSE: Chooses colours depending on multiple tests | 
					
						
							|  |  |  | #   PARAMETERS: $tests          arrayref containing a list of tests | 
					
						
							|  |  |  | #               $colours        arrayref containing a list of colour codes | 
					
						
							|  |  |  | #               $string         string to colour | 
					
						
							|  |  |  | #      RETURNS: The string with the desired colour (or none) | 
					
						
							|  |  |  | #  DESCRIPTION: The first test in @$tests which returns True causes the | 
					
						
							|  |  |  | #               corresponding colour to be chosen (unless it's missing) | 
					
						
							|  |  |  | #               otherwise, if no tests are True or the colour is missing, | 
					
						
							|  |  |  | #               there's no colour applied. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub colour_switch { | 
					
						
							|  |  |  |     my ( $tests, $colours, $string ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for ( my $i = 0; $i <= $#{$tests}; $i++ ) { | 
					
						
							|  |  |  |         if ( $tests->[$i] ) { | 
					
						
							|  |  |  |             if ( exists( $colours->[$i] ) ) { | 
					
						
							|  |  |  |                 return colored( $string, $colours->[$i] ); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else { | 
					
						
							|  |  |  |                 return $string; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return $string; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: alert | 
					
						
							|  |  |  | #      PURPOSE: Print an alert message using a format and colours | 
					
						
							|  |  |  | #   PARAMETERS: $fmt            the format to use. The first field is where | 
					
						
							|  |  |  | #                               the string 'ALERT' goes | 
					
						
							|  |  |  | #               $message        the alert message | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub alert { | 
					
						
							|  |  |  |     my ( $fmt, $message ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     $fmt = "%-16s %s\n" unless $fmt; | 
					
						
							|  |  |  |     print STDERR colored( sprintf( $fmt, "** ALERT **:", $message ), | 
					
						
							|  |  |  |         'bold red' ); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: coalesce | 
					
						
							|  |  |  | #      PURPOSE: To find the first defined argument and return it | 
					
						
							|  |  |  | #   PARAMETERS: Arbitrary number of arguments | 
					
						
							|  |  |  | #      RETURNS: The first defined argument or undef if there are none | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub coalesce { | 
					
						
							|  |  |  |     foreach (@_) { | 
					
						
							|  |  |  |         return $_ if defined($_); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return undef;    ## no critic | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: quote_crlf | 
					
						
							|  |  |  | #      PURPOSE: Display TAB (\t) and CRLF (\r\n) sequences for debugging | 
					
						
							|  |  |  | #   PARAMETERS: $string         String to process | 
					
						
							|  |  |  | #      RETURNS: The string with TAB, CR and LF characters made visible for | 
					
						
							|  |  |  | #               printing | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub quote_crlf { | 
					
						
							|  |  |  |     my ($string) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my $quoted = $string; | 
					
						
							|  |  |  |     $quoted =~ s/\t/\\t/g; | 
					
						
							|  |  |  |     $quoted =~ s/\n/\\n/g; | 
					
						
							|  |  |  |     $quoted =~ s/\r/\\r/g; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return "$quoted\n"; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: _debug | 
					
						
							|  |  |  | #      PURPOSE: Prints debug reports | 
					
						
							|  |  |  | #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | 
					
						
							|  |  |  | #               $message        Message to print | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | 
					
						
							|  |  |  | #               newline and then adds one in the 'print' to the caller doesn't | 
					
						
							|  |  |  | #               have to bother. Prepends the message with 'D> ' to show it's | 
					
						
							|  |  |  | #               a debug message. | 
					
						
							|  |  |  | #       THROWS: No exceptions | 
					
						
							|  |  |  | #     COMMENTS: None | 
					
						
							|  |  |  | #     SEE ALSO: N/A | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub _debug { | 
					
						
							|  |  |  |     my ( $active, $message ) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     chomp($message); | 
					
						
							|  |  |  |     print STDERR "D> $message\n" if $active; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: Options | 
					
						
							|  |  |  | #      PURPOSE: Processes command-line options | 
					
						
							|  |  |  | #   PARAMETERS: $optref     Hash reference to hold the options | 
					
						
							|  |  |  | #      RETURNS: Undef | 
					
						
							|  |  |  | #  DESCRIPTION: | 
					
						
							|  |  |  | #       THROWS: no exceptions | 
					
						
							|  |  |  | #     COMMENTS: none | 
					
						
							|  |  |  | #     SEE ALSO: n/a | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | sub Options { | 
					
						
							|  |  |  |     my ($optref) = @_; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     my @options = ( | 
					
						
							|  |  |  |         "help",        "documentation|manpage", | 
					
						
							|  |  |  |         "debug=i",     "silent!", | 
					
						
							|  |  |  |         "episode=i",   "infile=s", | 
					
						
							|  |  |  |         "shownotes:s", "format=s", | 
					
						
							|  |  |  |         "release=s",   "assets=s", | 
					
						
							|  |  |  |         "pictures=s",  "zip=s", | 
					
						
							|  |  |  |         "test!", | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !GetOptions( $optref, @options ) ) { | 
					
						
							|  |  |  |         pod2usage( | 
					
						
							|  |  |  |             -msg     => "$PROG version $VERSION\n", | 
					
						
							|  |  |  |             -verbose => 0, | 
					
						
							|  |  |  |             -exitval => 1 | 
					
						
							|  |  |  |         ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | __END__ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
					
						
							|  |  |  | #  Application Documentation | 
					
						
							|  |  |  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 
					
						
							|  |  |  | #{{{ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 NAME | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | parse_JSON - parse the JSON output file from the HPR show submission form | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 VERSION | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-01 20:45:20 +00:00
										 |  |  | This documentation refers to parse_JSON version 0.0.17 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 USAGE | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ./parse_JSON [-help] [-documentation|manpage] -episode=N -infile=FILE | 
					
						
							|  |  |  |          [-shownotes[=FILE]] [-format=FILE] [-release=FILE] [-assets=FILE] | 
					
						
							|  |  |  |          [-pictures=FILE] [-zip=FILE] [-debug=N] [-[no]silent] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ./parse_JSON -episode=2384 -infile=shownotes/hpr2384/shownotes.json | 
					
						
							|  |  |  |     ./parse_JSON -episode=2379 -infile=shownotes/hpr2379/shownotes.json -shownotes | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 OPTIONS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 8 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-help> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Prints a brief help message describing the usage of the program, and then exits. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-manpage> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Prints the entire documentation for the script. Note that this mode should use | 
					
						
							|  |  |  | I<perldoc> but there seems to be a bug in this tool at the moment; it does not | 
					
						
							|  |  |  | render properly. Currently we are using a simpler type of formatter. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-episode=N> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This mandatory option specifies the number of the HPR episode that is being | 
					
						
							|  |  |  | parsed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-infile=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This mandatory option provides the file containing the JSON-format output from | 
					
						
							|  |  |  | the submission form on the HPR website. It is expected that the file has been | 
					
						
							|  |  |  | collected by external scripts and saved in a local file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-shownotes[=FILE]> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This option, which may be omitted, defines the location where the parsed show | 
					
						
							|  |  |  | notes are to be written. If omitted no show notes are written, the script | 
					
						
							|  |  |  | simply parses the input file and reports on what it finds. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the option is given as B<-shownotes=FILE> the notes are written to the | 
					
						
							|  |  |  | nominated file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The B<FILE> portion may be given as a template string containing a I<%d> | 
					
						
							|  |  |  | sequence. This sequence is replaced by the show number provided by the | 
					
						
							|  |  |  | B<-episode=N> option. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the B<=FILE> portion is omitted a default template of 'hpr%d.out' is used | 
					
						
							|  |  |  | and again the I<%d> sequence is replaced by the show number provided by the | 
					
						
							|  |  |  | B<-episode=N> option. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the notes are declared as HTML5 then a symbolic link is made from the | 
					
						
							|  |  |  | parsed shownote file to a file where the extension is 'html'. This is to allow | 
					
						
							|  |  |  | the 'do_vim' script to edit the output file as in other cases but to ensure | 
					
						
							|  |  |  | that the file to be uploaded to the server is also edited. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-format=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This option, which may be omitted, causes the declared format to be written to | 
					
						
							|  |  |  | a nominated file. This is to allow other scripts to take action depending on | 
					
						
							|  |  |  | the format of the notes without having to determine this information | 
					
						
							|  |  |  | themselves. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the option is omitted no format information is written. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-release=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This option, which may be omitted, causes the show's release date to be | 
					
						
							|  |  |  | written to a nominated file. This is to allow other scripts to take action | 
					
						
							|  |  |  | depending on the release date of the show without having to determine this | 
					
						
							|  |  |  | information themselves. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the option is omitted no release date is written. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-assets=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This option, which may be omitted, causes a list of the show's assets to be | 
					
						
							|  |  |  | written to a nominated file. This is to allow other scripts to take action | 
					
						
							|  |  |  | depending on the assets of the show without having to determine this | 
					
						
							|  |  |  | information themselves. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Note that the assets do not include the main audio file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If no assets exist or the option is omitted no asset list is written. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-pictures=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This option, which may be omitted, causes a list of any picture assets to be | 
					
						
							|  |  |  | written to a nominated file. This is to allow other scripts to take action | 
					
						
							|  |  |  | depending on pictures sent with the show without having to determine this | 
					
						
							|  |  |  | information themselves. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If no pictures exist or the option is omitted no picture list is written. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-zip=FILE> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This option, which may be omitted, causes assets to written to a Zip file | 
					
						
							|  |  |  | with the name given. This only happens if the assets were sent separately, and | 
					
						
							|  |  |  | not in an archive file of some kind. This is because the processing of some | 
					
						
							|  |  |  | assets (only pictures at the moment) can cause changes to be made to the | 
					
						
							|  |  |  | originals. This step can be taken to preserve the original files. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If no assets exist or an archive file already exists, or the option is omitted | 
					
						
							|  |  |  | no zip file is written. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-debug=N> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Causes certain debugging information to be displayed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     0   (the default) no debug output | 
					
						
							|  |  |  |     1   N/A | 
					
						
							|  |  |  |     2   reports lines being parsed from the input file | 
					
						
							|  |  |  |     3   dumps two data structures: | 
					
						
							|  |  |  |             - @content: an array containing lines from the input file | 
					
						
							|  |  |  |             - %showdetails: a hash containing the parsed data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-[no]silent> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Controls the amount of information generated. If B<-silent> is selected the | 
					
						
							|  |  |  | output is reduced, otherwise B<-nosilent> is assumed or may be provided when | 
					
						
							|  |  |  | information about the generation of output files is reported. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<-[no]test> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Used when running tests on the script. Controls the log output, adding 'T ' at | 
					
						
							|  |  |  | the start of the log record. If B<-test> is selected the | 
					
						
							|  |  |  | prefix is added, otherwise B<-notest> is assumed and no prefix is added. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 DESCRIPTION | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This script is meant to be a component in a workflow dealing with the notes | 
					
						
							|  |  |  | submitted by HPR hosts with the shows they are uploading. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The script reads a JSON file generated by the show upload form on the HPR | 
					
						
							|  |  |  | website.  It decodes this into a hash structure containing all of the elements | 
					
						
							|  |  |  | of the file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Regardless of settings a check is made to ensure that the episode number | 
					
						
							|  |  |  | provided to the script matches that found in the input file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | An analysis of the show notes is carried out to determine how much HTML they | 
					
						
							|  |  |  | contain. The HTML level is determined as a percentage. If it is above 5% then | 
					
						
							|  |  |  | the notes are deemed to be HTML. If below 5% then an attempt is made to | 
					
						
							|  |  |  | determine what type of markup has been used. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | An analysis of the host details is also made to determine the composition of | 
					
						
							|  |  |  | the text therein. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Reports are generated for all of these components. A recommendation of the | 
					
						
							|  |  |  | type of processing the notes will require is also made. This was intended to | 
					
						
							|  |  |  | assist with automatic processing, but has not proved to be reliable enough. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The author of the notes declares what format they are. This is not currently | 
					
						
							|  |  |  | compared with what the analysis reports, but this may be added in a later | 
					
						
							|  |  |  | version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The notes are written to the nominated file (if there is one) for further | 
					
						
							|  |  |  | processing, database insertion, etc. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the notes are declared to be HTML then a copy is written to a file with the | 
					
						
							|  |  |  | same name as the output file but with an 'html' suffix. This can be rendered | 
					
						
							|  |  |  | to see if the notes really are HTML and look good enough for uploading. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 DIAGNOSTICS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Error and warning messages generated by the script. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =over 4 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<Unable to access input file ...: ...> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Type: fatal | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The script attempted to open the input file but failed. The message will | 
					
						
							|  |  |  | contain details of the failure. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<Input file ... is empty> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Type: fatal | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The input file exists but is empty | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<Invalid - too many '%d' sequences in '...'> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Type: fatal | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The template provided to the B<-shownotes=FILE> option contained too many '%d' | 
					
						
							|  |  |  | sequences. Ensure that there is only one of these. See above for details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<The JSON doesn't have the right structure> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Type: fatal | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | After a few basic checks on the components of the JSON file it does not seem | 
					
						
							|  |  |  | to be valid. The script expects there at least to be top-level sections with | 
					
						
							|  |  |  | the keys: I<episode>, I<metadata> and I<host>. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<The episode specified (...) doesn't match the one in the JSON (...)> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Type: fatal | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The episode number in the B<-episode=N> option does not match the episode | 
					
						
							|  |  |  | number found in the JSON. Both numbers are reported in the error message. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =item B<Unable to open output file ...: ...> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Type: fatal | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The script was unable to open one or both of the output files or the file of | 
					
						
							|  |  |  | format information. These will be named according to the B<-shownotes=FILE> | 
					
						
							|  |  |  | option with a suffix of '.out' or '.html' or specified by the B<-format=FILE> | 
					
						
							|  |  |  | option. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =back | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 DEPENDENCIES | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Data::Dumper | 
					
						
							|  |  |  |     Getopt::Long | 
					
						
							|  |  |  |     HTML::Parser | 
					
						
							|  |  |  |     JSON | 
					
						
							|  |  |  |     Link::Extor | 
					
						
							|  |  |  |     Log::Handler | 
					
						
							|  |  |  |     Pod::Usage | 
					
						
							|  |  |  |     Term::ANSIColor | 
					
						
							|  |  |  |     Text::CSV | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 BUGS AND LIMITATIONS | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | There are no known bugs in this module. | 
					
						
							|  |  |  | Please report problems to Dave Morriss  (Dave.Morriss@gmail.com) | 
					
						
							|  |  |  | Patches are welcome. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 AUTHOR | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Dave Morriss (Dave.Morriss@gmail.com) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =head1 LICENCE AND COPYRIGHT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Copyright (c) 2021 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This module is free software; you can redistribute it and/or | 
					
						
							|  |  |  | modify it under the same terms as Perl itself. See perldoc perlartistic. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | =cut | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #}}} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # [zo to open fold, zc to close] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | 
					
						
							|  |  |  | 
 |