forked from HPR/hpr-tools
		
	
		
			
	
	
		
			2349 lines
		
	
	
		
			74 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			2349 lines
		
	
	
		
			74 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: parse_JSON | ||
|  | # | ||
|  | #        USAGE: ./parse_JSON [-help] [-manpage] -episode=N -infile=FILE | ||
|  | #                       [-shownotes[=FILE]] [-format=FILE] [-release=FILE] | ||
|  | #                       [-assets=FILE] [-debug=N] [-[no]silent] [-[no]test] | ||
|  | # | ||
|  | #  DESCRIPTION: Script to parse the JSON now being generated for HPR | ||
|  | #               shows. Replaces the original 'parse_shownotes' and is a fair | ||
|  | #               bit simpler because the parsing of the original PHP dump isn't | ||
|  | #               needed. However, it has grown in complexity as features for | ||
|  | #               handling assets have been added. Downstream scripts now need | ||
|  | #               information about pictures and assets, and all archive files | ||
|  | #               need to be unpacked and placed in a directory for uploading to | ||
|  | #               the HPR server. | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.14 | ||
|  | #      CREATED: 2020-11-28 10:52:02 | ||
|  | #     REVISION: 2024-03-09 20:34:54 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use 5.30.0; | ||
|  | use utf8; | ||
|  | 
 | ||
|  | use Getopt::Long; | ||
|  | use Pod::Usage; | ||
|  | 
 | ||
|  | use Term::ANSIColor; | ||
|  | 
 | ||
|  | use HTML::Parser (); | ||
|  | use HTML::LinkExtor; | ||
|  | use Text::CSV; | ||
|  | use Text::SpellChecker; | ||
|  | 
 | ||
|  | use JSON; | ||
|  | 
 | ||
|  | use List::MoreUtils qw{any}; | ||
|  | use Archive::Any; | ||
|  | use MIME::Types; | ||
|  | use IO::Compress::Zip qw(:all); | ||
|  | use File::Copy; | ||
|  | use File::Spec::Functions; | ||
|  | use Path::Class; | ||
|  | 
 | ||
|  | use Log::Handler; | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (manually incremented) | ||
|  | # | ||
|  | our $VERSION = '0.0.14'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script and directory names | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | $DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir = "$ENV{HOME}/HPR/Show_Submission"; | ||
|  | my $logdir  = "$basedir/logs"; | ||
|  | my $logfile = "$logdir/${PROG}.log"; | ||
|  | 
 | ||
|  | my ( $snlevel, $showno, $summarylength, $notelength ); | ||
|  | 
 | ||
|  | our $MARKUP_DETECTED = 0; | ||
|  | my $MAXNOTELEN = 4000; | ||
|  | 
 | ||
|  | # | ||
|  | # Printing: general output format | ||
|  | # | ||
|  | my $ofmt = "%-17s %s\n"; | ||
|  | 
 | ||
|  | # | ||
|  | # URL base for relative URLs pointing at assets | ||
|  | # | ||
|  | my $base_url = 'https://hackerpublicradio.org/'; | ||
|  | 
 | ||
|  | # | ||
|  | # Text colours | ||
|  | # | ||
|  | my $red     = color('red'); | ||
|  | my $green   = color('green'); | ||
|  | my $yellow  = color('yellow'); | ||
|  | my $magenta = color('magenta'); | ||
|  | my $bold    = color('bold'); | ||
|  | my $reset   = color('reset'); | ||
|  | 
 | ||
|  | my %snatts = ( | ||
|  |     html_percent => 0, | ||
|  |     html_tags    => 0, | ||
|  |     is_markup    => 0, | ||
|  | ); | ||
|  | 
 | ||
|  | # | ||
|  | # A list of signature expressions used to try and guess what markup format was | ||
|  | # used. | ||
|  | # | ||
|  | # {{{ | ||
|  | # | ||
|  | # Note that the regexes are applied to a string which is a concatenation of | ||
|  | # the notes, so care needs to be taken with anchors. After processing the JSON | ||
|  | # the lines of the notes will be separated by newline characters. Use an 'm' | ||
|  | # qualifier to make a '^' anchor treat it as multi-line. | ||
|  | # | ||
|  | my @markup_types = ( | ||
|  |     # Has [...](...) and  entities | ||
|  |     {   regex   => qr{!?\[[^\]]+\]\(https?:[^)]+\)}, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown link or image" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has [...][nnn] entities | ||
|  |     {   regex   => qr{\[[^\]]+\]\[[^\]]+\]}, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown reference" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has lines beginning with 1-6 hashes (Atx-style header) | ||
|  |     {   regex   => qr{^#{1,6} \w}m, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown Atx-style header" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # A line of stuff followed by a line of = or - (Setex-style header) | ||
|  |     {   regex   => qr{^(\s|\w)+.^[=-]+$}sm, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown Setex-style header" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has lines starting with 0+ spaces hyphen 1+ spaces | ||
|  |     {   regex   => qr{^\s*[-*]\s+\w}m, | ||
|  |         type    => [ 'markdown', 'reStructuredText', 'org-mode' ], | ||
|  |         comment => "Markdown or reStructuredText list element" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has lines beginning with '\d.\s' (number(s) dot space(s)) | ||
|  |     {   regex   => qr{^\s*\d+\.\s+\w}m, | ||
|  |         type    => [ 'markdown', 'reStructuredText' ], | ||
|  |         comment => | ||
|  |             "Markdown, reStructuredText or org-mode ordered list element" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has lines which begin with 3+ ` or ~ | ||
|  |     {   regex   => qr{^[`~]{3,}$}m, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown fenced block delimiter" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has words enclosed in 1 or 2 * | ||
|  |     {   regex   => qr{\*{1,2}\w+\*{1,2}}, | ||
|  |         type    => [ 'markdown', 'reStructuredText' ], | ||
|  |         comment => "Markdown or reStructuredText emphasis or bold" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has words enclosed in ` | ||
|  |     {   regex   => qr{`\w+`}, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown verbatim text" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has a line of 3 or more *, - or _ possibly with intervening spaces | ||
|  |     {   regex   => qr{^[*_-]\s?[*_-]\s?[*_-][*_ -]*$}m, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown horizontal rule" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has bare http/https URLs | ||
|  |     {   regex   => qr{[^(]\bhttps?:}, | ||
|  |         type    => ['txt2tags'], | ||
|  |         comment => "Bare http(s) url" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has [...]_ entities (footnote, citation reference) | ||
|  |     {   regex   => qr{\[(\w+|\d+|#|\*)\]_}, | ||
|  |         type    => ['reStructuredText'], | ||
|  |         comment => "reStructuredText footnote, citation or reference" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has `Text <URL>`_ entities (hyperlinks) | ||
|  |     {   regex   => qr{`\w+<https?:[^>]+>`_}, | ||
|  |         type    => ['reStructuredText'], | ||
|  |         comment => "reStructuredText hyperlink" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # Has superscript or subscript markup | ||
|  |     {   regex   => qr{(\^\w+\^|\~\w+\~)}, | ||
|  |         type    => ['markdown'], | ||
|  |         comment => "Markdown superscript or subscript" | ||
|  |     }, | ||
|  | 
 | ||
|  |     # STARS KEYWORD PRIORITY TITLE TAGS is a header definition in Org mode | ||
|  |     {   regex => | ||
|  |             qr{^\*+\s(TODO|FEEDBACK|VERIFY|\||DONE|DELEGATED)?\s?(\[[#]\w\])?\s?\w+}m, | ||
|  |         type    => ['org-mode'], | ||
|  |         comment => "Org mode header" | ||
|  |     }, | ||
|  | 
 | ||
|  |     #    {   regex   => qr{}, | ||
|  |     #        type    => ['markdown'], | ||
|  |     #        comment => "" | ||
|  |     #    }, | ||
|  | 
 | ||
|  | ); | ||
|  | 
 | ||
|  | # }}} | ||
|  | 
 | ||
|  | my @markup_found; | ||
|  | my $markup_choice; | ||
|  | 
 | ||
|  | my ( $showdir,     $assetdir , $statusfile ); | ||
|  | my ( %media_files, $media_files ); | ||
|  | my ( %assets,      @assets,    @audio, $sanitised ); | ||
|  | my ( $has_audio,   $has_extra, $has_archives ); | ||
|  | my ( @archives,    @extracted ); | ||
|  | my ( $astate,      $has_pictures, @pictures, $pictures ); | ||
|  | my ( %spellchecks ); | ||
|  | my @pstates = ( | ||
|  |     'No pictures found',                # 0 | ||
|  |     'Pictures that need management',    # 1 | ||
|  |     'Managed pictures',                 # 2 | ||
|  | ); | ||
|  | 
 | ||
|  | # | ||
|  | # Enable Unicode mode | ||
|  | # | ||
|  | binmode STDOUT, ":encoding(UTF-8)"; | ||
|  | binmode STDERR, ":encoding(UTF-8)"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Options and arguments | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $DEF_DEBUG = 0; | ||
|  | 
 | ||
|  | # | ||
|  | # Process options | ||
|  | # | ||
|  | my %options; | ||
|  | Options( \%options ); | ||
|  | 
 | ||
|  | # | ||
|  | # Default help | ||
|  | # | ||
|  | pod2usage( | ||
|  |     -msg     => "$PROG version $VERSION\n", | ||
|  |     -verbose => 0, | ||
|  |     -exitval => 1 | ||
|  | ) if ( $options{'help'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Detailed help | ||
|  | # | ||
|  | pod2usage( | ||
|  |     -msg       => "$PROG version $VERSION\n", | ||
|  |     -verbose   => 2, | ||
|  |     -exitval   => 1, | ||
|  |     -noperldoc => 0 | ||
|  | ) if ( $options{'documentation'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # Collect options | ||
|  | # | ||
|  | my $DEBUG  = ( defined( $options{debug} )  ? $options{debug}  : $DEF_DEBUG ); | ||
|  | my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 ); | ||
|  | my $test   = ( defined( $options{test} )   ? $options{test}   : 0 ); | ||
|  | 
 | ||
|  | $showno = $options{episode}; | ||
|  | pod2usage( | ||
|  |     -msg     => "$PROG version $VERSION\nMissing mandatory option -episode\n", | ||
|  |     -verbose => 0, | ||
|  |     -exitval => 1 | ||
|  | ) unless $showno; | ||
|  | 
 | ||
|  | my $infile = $options{infile}; | ||
|  | pod2usage( | ||
|  |     -msg     => "$PROG version $VERSION\nMissing mandatory option -infile\n", | ||
|  |     -verbose => 0, | ||
|  |     -exitval => 1 | ||
|  | ) unless $infile; | ||
|  | die "Unable to access input file $infile: $!\n" unless -r $infile; | ||
|  | die "Input file $infile is empty\n" if -z $infile; | ||
|  | 
 | ||
|  | # | ||
|  | # Where to put various files. The second group of options must all have | ||
|  | # filenames to be created, but the first can define its own name if not given. | ||
|  | # | ||
|  | my $shownotes = $options{shownotes}; | ||
|  | 
 | ||
|  | my $formatfile  = $options{format}; | ||
|  | my $releasefile = $options{release}; | ||
|  | my $assetfile   = $options{assets}; | ||
|  | my $picturefile = $options{pictures}; | ||
|  | my $zipfile     = $options{zip}; | ||
|  | 
 | ||
|  | # | ||
|  | # Work on the show notes output file, allowing defaults and substitution points | ||
|  | # for convenience. | ||
|  | # | ||
|  | if ( defined($shownotes) ) { | ||
|  |     $shownotes = output_file_name( $shownotes, $showno, 'hpr%d.out' ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Directories and file specific to this show | ||
|  | # | ||
|  | $showdir  = "$basedir/shownotes/hpr$showno"; | ||
|  | $assetdir = "$showdir/uploads"; | ||
|  | $statusfile = "$showdir/.status"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Set up logging keeping the default log layout except for the date. The format | ||
|  | # is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is | ||
|  | # the message. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $log = Log::Handler->new(); | ||
|  | 
 | ||
|  | $log->add( | ||
|  |     file => { | ||
|  |         timeformat     => "%Y/%m/%d %H:%M:%S", | ||
|  |         message_layout => ($test ? 'T ' : '') . "%T [%L] %m", | ||
|  |         filename       => $logfile, | ||
|  |         minlevel       => 0,                     # emergency, emerg | ||
|  |         maxlevel       => 7,                     # debug | ||
|  |         } | ||
|  | ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | #  Read the input file and parse it into $content (hashref) | ||
|  | #------------------------------------------------------------------------------- | ||
|  | my $json = JSON->new->utf8; | ||
|  | open( my $fh, '<:encoding(UTF-8)', $infile ); | ||
|  | my $json_text = <$fh>; | ||
|  | close($fh); | ||
|  | 
 | ||
|  | # TODO: bad JSON can crash the script here! | ||
|  | my $content = decode_json($json_text); | ||
|  | 
 | ||
|  | $log->info( $showno, "[$VERSION] Processing $infile" ); | ||
|  | 
 | ||
|  | _debug( $DEBUG > 2, Dumper($content) ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Validate the JSON structure in case it's changed for some reason | ||
|  | #------------------------------------------------------------------------------- | ||
|  | die "The JSON doesn't have the right structure\n" | ||
|  |     unless ( exists( $content->{episode} ) | ||
|  |     && exists( $content->{metadata} ) | ||
|  |     && exists( $content->{host} ) ); | ||
|  | 
 | ||
|  | print STDERR '-' x 80, "\n"; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Report details from the parsed data. The percentages of HTML in the Host | ||
|  | # Profile and the notes themselves are computed using an HTML parser that counts | ||
|  | # the tags. | ||
|  | #------------------------------------------------------------------------------- | ||
|  | printf STDERR $ofmt, "Show:", $content->{metadata}{Episode_Number}; | ||
|  | printf STDERR $ofmt, "Date:", $content->{metadata}{Episode_Date}; | ||
|  | 
 | ||
|  | # | ||
|  | # Detect Unicode in the Title, Summary or Tags and flag their presence. | ||
|  | # | ||
|  | for my $key ( 'Title', 'Summary', 'Tags' ) { | ||
|  |     my $uflag = ( $content->{episode}{$key} =~ /[^\x{00}-\x{7F}]/ ); | ||
|  |     printf STDERR $ofmt, "$key:", | ||
|  |         colour_if( | ||
|  |             $uflag, | ||
|  | #            'bold yellow on_magenta', | ||
|  |             'black on_bright_yellow', | ||
|  |             $content->{episode}{$key} | ||
|  |         ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Check summary length. The field might be filled and something might have | ||
|  | # been lost. | ||
|  | # TODO: Check on other lengths? | ||
|  | # | ||
|  | $summarylength = length($content->{episode}{Summary}); | ||
|  | if ($summarylength eq 100) { | ||
|  |     printf STDERR $ofmt, "- Summary: check length"; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Perform a spelling check on these fields. Collect any that are detected | ||
|  | # then, if any were found, report them. | ||
|  | # | ||
|  | for my $key ( 'Title', 'Summary', 'Tags' ) { | ||
|  |     my @errors = spellcheck( $content->{episode}{$key} ); | ||
|  |     $spellchecks{$key} = \@errors if @errors; | ||
|  | } | ||
|  | _debug( $DEBUG > 2, '%spellchecks: ' . Dumper( \%spellchecks ) ); | ||
|  | if (%spellchecks) { | ||
|  |     print STDERR colored( "Spelling checks:\n", 'bold yellow' ); | ||
|  |     for my $key ( 'Title', 'Summary', 'Tags' ) { | ||
|  |         printf STDERR $ofmt, "- $key:", join( ',', @{ $spellchecks{$key} } ) | ||
|  |             if ( exists( $spellchecks{$key} ) ); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Report on HTML in profile and notes | ||
|  | # | ||
|  | printf STDERR $ofmt, "Profile:", | ||
|  |     count_tags( $content->{host}{Host_Profile} ) | ||
|  |     . " HTML start/end tags found"; | ||
|  | $snlevel = count_tags( $content->{episode}{Show_Notes} ); | ||
|  | printf STDERR $ofmt, "Notes:", $snlevel . " HTML start/end tags found"; | ||
|  | 
 | ||
|  | # | ||
|  | # Check the note length because if they are too long we might want to split | ||
|  | # them | ||
|  | # | ||
|  | $notelength = length( $content->{episode}{Show_Notes} ); | ||
|  | if ( $notelength > $MAXNOTELEN ) { | ||
|  |     printf STDERR $ofmt, "Notes:", | ||
|  |         colored( "Notes are longer than $MAXNOTELEN ($notelength)", | ||
|  |         'bold yellow on_magenta' ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Look for Unicode in the notes and flag it if found | ||
|  | # | ||
|  | if ( find_Unicode( $content->{episode}{Show_Notes} ) ) { | ||
|  |     printf STDERR $ofmt, "Notes:", | ||
|  |         colored( "Unicode characters found", 'bold yellow on_magenta' ); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Always check that the given show number matches what we parsed | ||
|  | #------------------------------------------------------------------------------- | ||
|  | die "The episode specified ($showno) doesn't match the one in the JSON " | ||
|  |     . "($content->{metadata}{Episode_Number})\n" | ||
|  |     unless ( $content->{metadata}{Episode_Number} == $showno ); | ||
|  | 
 | ||
|  | # | ||
|  | # A zero host id means this is a new host. HOWEVER, it can also mean an | ||
|  | # existing host gave a mail address we don't have in the database. to take | ||
|  | # care! | ||
|  | # | ||
|  | if ( $content->{host}{Host_ID} eq 0 ) { | ||
|  |     print STDERR colored( | ||
|  |         sprintf( $ofmt, "New host:", $content->{host}{Host_Name} ), | ||
|  |         'bold yellow' ); | ||
|  | } | ||
|  | else { | ||
|  |     printf STDERR $ofmt, "Host name:", $content->{host}{Host_Name}; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Save HTML tag count | ||
|  | # | ||
|  | $snatts{html_tags} = $snlevel; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Have a go at the HTML tag percentage | ||
|  | #------------------------------------------------------------------------------- | ||
|  | $snatts{html_percent} | ||
|  |     = html_level( $content->{episode}{Show_Notes}, $snlevel ); | ||
|  | 
 | ||
|  | if ( $snlevel > 0 ) { | ||
|  |     $log->info( $showno, "Notes contain HTML" ); | ||
|  |     push( @markup_found, 'HTML' ); | ||
|  |     if ( $snatts{html_percent} > 5 ) { | ||
|  |         $log->info( $showno, "Notes probably are HTML" ); | ||
|  |         push( @markup_found, 'HTML' ); | ||
|  |     } | ||
|  |     else { | ||
|  |         push( | ||
|  |             @markup_found, | ||
|  |             @{  detect_markup( | ||
|  |                     $content->{episode}{Show_Notes}, \@markup_types, | ||
|  |                     $ofmt | ||
|  |                 ) | ||
|  |             } | ||
|  |         ); | ||
|  |     } | ||
|  | } | ||
|  | else { | ||
|  |     $log->info( $showno, "Notes contain no HTML" ); | ||
|  |     push( | ||
|  |         @markup_found, | ||
|  |         @{  detect_markup( $content->{episode}{Show_Notes}, | ||
|  |                 \@markup_types, $ofmt ) | ||
|  |         } | ||
|  |     ); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Report what markup was used (not really necessary any more but left in | ||
|  | # because it's interesting). | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # 2022-02-18 Now we have added 'org-mode' which doesn't match any formats sent | ||
|  | # from the web form, so we highlight this to help with the editing process. | ||
|  | # It's possible to use Pandoc to generate Markdown for editing or just convert | ||
|  | # straight to HTML if the result looks good. | ||
|  | # | ||
|  | $markup_choice = choose_markup( \@markup_found ); | ||
|  | 
 | ||
|  | printf STDERR $ofmt, "Declared format:", | ||
|  |     colour_if( | ||
|  |         $content->{metadata}{Shownotes_Format} ne 'plain_text', | ||
|  |         'bold yellow on_magenta', | ||
|  |         $content->{metadata}{Shownotes_Format} | ||
|  |     ); | ||
|  | printf STDERR $ofmt, "Found:", summarise_markup( \@markup_found ); | ||
|  | #printf STDERR $ofmt, "Recommendation:", | ||
|  | #   colour_if( | ||
|  | #       $markup_choice eq 'org-mode', | ||
|  | #       'bold yellow on_magenta', | ||
|  | #       $markup_choice | ||
|  | #   ); | ||
|  | 
 | ||
|  | printf STDERR $ofmt, "Recommendation:", | ||
|  |     colour_switch( | ||
|  |         [ | ||
|  |             $markup_choice eq 'org-mode', | ||
|  |             $markup_choice eq 'markdown', | ||
|  |             $markup_choice eq 'txt2tags', | ||
|  |             $markup_choice eq 'none', | ||
|  |             $markup_choice =~ / or /, | ||
|  |         ], | ||
|  |         [ | ||
|  |             'bold yellow on_magenta', | ||
|  |             'yellow', | ||
|  |             'yellow', | ||
|  |             'red', | ||
|  |             'yellow', | ||
|  |         ], | ||
|  |         $markup_choice | ||
|  |     ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # What about the media? | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Examine the delivered media details. Start by finding the maximum index | ||
|  | # (media_files is a JSON hash containing keys like 'name' where the values | ||
|  | # are arrays). The elements are linked by their positions. | ||
|  | # | ||
|  | my $maxind = $#{ $content->{metadata}{FILES}->{media_files}->{name} }; | ||
|  | 
 | ||
|  | # | ||
|  | # Step through the media file names and their sizes building a Perl hash keyed by the | ||
|  | # name containing the file size of each | ||
|  | # | ||
|  | for ( my $i = 0; $i <= $maxind; $i++ ) { | ||
|  |     $media_files{ $content->{metadata}{FILES}->{media_files}->{name}->[$i] } | ||
|  |         = $content->{metadata}{FILES}->{media_files}->{size}->[$i]; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # If a media file is zero size, remove it from the list | ||
|  | # | ||
|  | for my $key ( keys(%media_files) ) { | ||
|  |     if ( $media_files{$key} eq 0 ) { | ||
|  |         delete( $media_files{$key} ); | ||
|  |         $maxind--; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Generate a string containing the names of the (non-empty) media files | ||
|  | # | ||
|  | if (%media_files) { | ||
|  |     $media_files = join( ", ", keys(%media_files) ); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Determine the 'asset' state of the show | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # (The information is in %media_files although strictly there could be other | ||
|  | # files like scripts, and pictures.) The audio is in %media_files unless the | ||
|  | # host has sent in an URL for downloading. | ||
|  | # | ||
|  | if (%media_files) { | ||
|  |     # | ||
|  |     # Build a hash (%assets) keyed by filename with the value being a hashref | ||
|  |     # with keys 'type' and 'size'. | ||
|  |     # | ||
|  |     for ( my $i = 0; $i <= $maxind; $i++ ) { | ||
|  |         my $name = $content->{metadata}{FILES}->{media_files}->{name}->[$i]; | ||
|  |         $assets{$name} = { | ||
|  |             type => $content->{metadata}{FILES}->{media_files}->{type}->[$i], | ||
|  |             size => $content->{metadata}{FILES}->{media_files}->{size}->[$i], | ||
|  |         }; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Process the assets | ||
|  |     # | ||
|  |     if (%assets) { | ||
|  |         # | ||
|  |         # Save just the file names in an array (@assets) for convenience. | ||
|  |         # | ||
|  |         @assets = keys(%assets); | ||
|  | 
 | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # Check that there's audio in the media. | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # | ||
|  |         # First look for any assets marked as audio or video (this one because | ||
|  |         # some audio has been seen to be marked this way). We haven't expanded | ||
|  |         # archive files yet because we don't want to eliminate those files | ||
|  |         # from @assets. | ||
|  |         # | ||
|  |         $has_audio = @audio | ||
|  |             = grep { $assets{$_}->{type} =~ /^(audio|video)/i } keys(%assets); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Now, if no audio has yet been found, look for audio file extensions | ||
|  |         # | ||
|  |         unless ($has_audio) { | ||
|  |             $has_audio = @audio | ||
|  |                 = grep { $_ =~ /\.(aac|flac|mp3|ogg|spx|wav)$/i } | ||
|  |                 keys(%assets); | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Check there is not more than one audio file | ||
|  |         # | ||
|  |         if ($#audio > 1) { | ||
|  |             print colored( | ||
|  |                 "**Warning** More than one audio file uploaded", 'red'), "\n"; | ||
|  |             $log->warning( $showno, "More than one audio file uploaded" ); | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # We have audio. Use the @audio array to remove the matches from | ||
|  |         # @assets. | ||
|  |         # | ||
|  |         if ($has_audio) { | ||
|  |             @assets = array_difference( \@assets, \@audio ); | ||
|  |         } | ||
|  |         if (@assets) { | ||
|  |             _debug( $DEBUG > 2, '@assets: ' . Dumper( \@assets ) ); | ||
|  |         } | ||
|  | 
 | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # Look for archive files in the assets | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         if ( find_archive( \%assets, \@archives ) > 0 ) { | ||
|  |             _debug( $DEBUG > 1, '@archives: ' . Dumper( \@archives ) ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Extract the contents of any archives into the show directory and | ||
|  |             # write details into %assets and @extracted. | ||
|  |             # | ||
|  |             $has_archives | ||
|  |                 = ( extract_archives( \%assets, \@archives, \@extracted, $showdir ) > 0 ); | ||
|  |         } | ||
|  |         else { | ||
|  |             # | ||
|  |             # No archives, but we may have assets. Keep copies of the assets | ||
|  |             # in case we need to make destructive changes. | ||
|  |             # | ||
|  |             if (@assets && $zipfile) { | ||
|  |                 my @input = map {"$showdir/$_"} @assets; | ||
|  |                 _debug( $DEBUG > 2, 'Zip creation - @input: ' . Dumper( | ||
|  |                         \@input ) ); | ||
|  | 
 | ||
|  |                 zip \@input => $zipfile | ||
|  |                     or die "Zip creation failed: $ZipError\n"; | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # Refresh assets after expanding archives | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # | ||
|  |         # Remove archive files and add extracted files to the assets. We may | ||
|  |         # have a different number than we started with because any archives found | ||
|  |         # will have been expanded and the archive names removed. | ||
|  |         # | ||
|  |         @assets = array_difference(\@assets,\@archives); | ||
|  |         push(@assets, @extracted); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Remove directory stuff from @assets elements and %assets keys | ||
|  |         # | ||
|  |         # foreach my $file (@assets) { | ||
|  |         #     ( my $key = $file ) =~ s{^.*/}{}; | ||
|  |         #     $assets{$key} = delete( $assets{$file} ); | ||
|  |         #     $file = $key; | ||
|  |         # } | ||
|  |         prepare_assets( \%assets, \@assets, $showdir, $assetdir ); | ||
|  |         if (@assets) { | ||
|  |             _debug( $DEBUG > 1, 'After prepare_assets, %assets: ' . Dumper( \%assets ) ); | ||
|  |             _debug( $DEBUG > 1, 'After prepare_assets, @assets: ' . Dumper( \@assets ) ); | ||
|  |         } | ||
|  | 
 | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # Sanitise any filenames that need it | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         $sanitised = sanitise_filenames( \%assets, \@assets, $assetdir ); | ||
|  | 
 | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         # Look for any image files in the assets hash | ||
|  |         #------------------------------------------------------------------------------- | ||
|  |         $has_pictures = @pictures | ||
|  |             = grep { $assets{$_}->{type} =~ /^image/ } keys(%assets); | ||
|  |         $pictures = join( ', ', @pictures ); | ||
|  | 
 | ||
|  |         # if (@archives = | ||
|  |         #     grep { $assets{$_}->{type} =~ q{^application/zip} } keys(%assets)) { | ||
|  |         #     my $archive = Archive::Any->new( "$showdir/$archives[0]" ); | ||
|  |         #     @archived = $archive->files; | ||
|  |         #     $has_pictures += scalar(@archived); | ||
|  |         # } | ||
|  | 
 | ||
|  |         # print "D> Pictures found\n" if $has_pictures; | ||
|  |         # if ($has_pictures && @archived && $DEBUG > 2) { | ||
|  |         #    if ($has_pictures && $DEBUG > 2) { | ||
|  |         #        _debug (1, '@archives: ' . Dumper(\@archives)); | ||
|  |         #        _debug (1, '@archived: ' . Dumper(\@archived)); | ||
|  |         #    } | ||
|  | 
 | ||
|  |         if ( $has_pictures && $DEBUG > 2 ) { | ||
|  |             _debug( 1, '@pictures: ' . Dumper( \@pictures ) ); | ||
|  |         } | ||
|  | 
 | ||
|  |         # | ||
|  |         # Look for files not matching audio or image | ||
|  |         # | ||
|  |         $has_extra = grep { $assets{$_}->{type} !~ /^(image|audio|video)/ } | ||
|  |             keys(%assets); | ||
|  | 
 | ||
|  |         _debug( $DEBUG > 2, '%assets: ' . Dumper( \%assets ) ) if (%assets); | ||
|  | 
 | ||
|  |         $log->info( $showno, "Media files: $media_files" ) if ($media_files); | ||
|  |         $log->info( $showno, "Pictures: " . join( ', ', @pictures ) ) | ||
|  |             if (@pictures); | ||
|  |         $log->info( $showno, "Assets: " . join( ', ', @assets ) ) | ||
|  |             if (@assets); | ||
|  | 
 | ||
|  |     } | ||
|  | } | ||
|  | else { | ||
|  |     # | ||
|  |     # No media files at all! | ||
|  |     # | ||
|  |     $has_archives = $has_pictures = $has_audio = $has_extra = 0; | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Sanity checks | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # 1. Check for included media or a download URL. Alert if neither | ||
|  | # | ||
|  | if ($media_files) { | ||
|  |     # | ||
|  |     # Have media, but do we have audio? | ||
|  |     # | ||
|  |     # printf STDERR $ofmt, "Media files:", $media_files; | ||
|  |     printf STDERR "%s\n", | ||
|  |         textFormat( $media_files, 'Media files:', 'L', 18, 80 ); | ||
|  |     unless ($has_audio) { | ||
|  |         alert( $ofmt, "No audio!" ); | ||
|  |         $log->error( $showno, "No audio!" ); | ||
|  |     } | ||
|  | } | ||
|  | elsif ( length( $content->{metadata}{url} ) > 0 ) { | ||
|  |     # | ||
|  |     # No media, but do we have a download URL? | ||
|  |     # | ||
|  |     printf STDERR $ofmt, "Download from:", $content->{metadata}{url}; | ||
|  | } | ||
|  | else { | ||
|  |     # | ||
|  |     # No media and no download URL, so no audio | ||
|  |     # | ||
|  |     alert( $ofmt, "No audio!" ); | ||
|  |     $log->error( $showno, "No audio!" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # 2. Alert if no tags | ||
|  | # | ||
|  | if ( length( $content->{episode}{Tags} ) == 0 ) { | ||
|  |     alert( $ofmt, "No tags!" ); | ||
|  |     $log->error( $showno, "No tags!" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # 3. Declared html5 but no HTML found | ||
|  | # | ||
|  | if ( $snlevel == 0 && $content->{metadata}{Shownotes_Format} =~ /html5/i ) { | ||
|  |     alert( $ofmt, "Declared format 'html5' with no HTML in the notes!" ); | ||
|  |     $log->error( $showno, | ||
|  |         "Declared format 'html5' with no HTML in the notes!" ); | ||
|  |     #    printf STDERR $ofmt, "", | ||
|  |     #        "${yellow}Forced format to 'plain_text'.${reset}"; | ||
|  |     printf STDERR colored( | ||
|  |         sprintf( $ofmt, "", "Forced format to 'plain_text'." ), | ||
|  |         'bold yellow' ); | ||
|  |     $content->{metadata}{Shownotes_Format} = 'plain_text'; | ||
|  |     # TODO: propagate the format change into the JSON | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # 4. Tags aren't CSV. Not much of a check since so many formats are acceptable | ||
|  | #    here but not in the database. | ||
|  | #    If there's only one tag write an alert reporting the length in case the | ||
|  | #    sender forgot the commas. | ||
|  | #    TODO: Reconsider after using for a while. May be no point in doing this. | ||
|  | # | ||
|  | if ( length( $content->{episode}{Tags} ) > 0 ) { | ||
|  |     my $csv = Text::CSV->new( { sep_char => ',', allow_whitespace => 1 } ) | ||
|  |         or warn "Problem with Text::CSV: $@"; | ||
|  |     my $status = $csv->parse( $content->{episode}{'Tags'} ); | ||
|  |     unless ($status) { | ||
|  |         alert( $ofmt, $csv->error_input() . "\n" . $csv->error_diag() ); | ||
|  |         $log->error( $showno, "Tag format is invalid" ); | ||
|  |     } | ||
|  |     my @cols = $csv->fields(); | ||
|  |     if ( scalar(@cols) eq 1 ) { | ||
|  |         alert( $ofmt, | ||
|  |             "Only one tag " . length( $cols[0] ) . " characters long!" ); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # 5. Declared HTML, but does it have assets and if so are they properly | ||
|  | #    pointed to? | ||
|  | # | ||
|  | if ( $snlevel > 0 && $content->{metadata}{Shownotes_Format} =~ /html5/i ) { | ||
|  |     # | ||
|  |     # Perform a check on the notes in memory, looking for asset links and | ||
|  |     # checking they are kosher | ||
|  |     # | ||
|  |     unless ( check_html( $content->{episode}{Show_Notes}, $base_url ) ) { | ||
|  |         print colored( | ||
|  |             "Apparently incorrect URLs detected in the notes", 'red' | ||
|  |             ), "\n"; | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # 6. Declared plain text, but analysis recommends HTML, so highlight this to | ||
|  | #    allow changes to be made. | ||
|  | # | ||
|  | #if ( $snlevel > 5 && $content->{metadata}{Shownotes_Format} =~ /plain_text/i ) { | ||
|  | if (   $markup_choice eq 'html5' | ||
|  |     && $content->{metadata}{Shownotes_Format} =~ /plain_text/i ) | ||
|  | { | ||
|  |     alert( $ofmt, | ||
|  |         "Declared format 'plain_text' but notes seem to be HTML5!" ); | ||
|  |     $log->error( $showno, | ||
|  |         "Declared format 'plain_text' but notes seem to be HTML5!" ); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Determine the picture asset state | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # If there are pictures and the format is plain text then the state is 'Pictures | ||
|  | # that need management' (1), otherwise 'Managed pictures' (2). If no pictures | ||
|  | # then state is 'No pictures found' (0). | ||
|  | # | ||
|  | if ($has_pictures) { | ||
|  |     $astate | ||
|  |         = ( $content->{metadata}{Shownotes_Format} eq 'plain_text' ? 1 : 2 ); | ||
|  |     # printf STDERR $ofmt, "Pictures:", $pictures; | ||
|  |     printf STDERR "%s\n", textFormat( $pictures, 'Pictures:', 'L', 18, 80 ); | ||
|  | } | ||
|  | else { | ||
|  |     $astate = 0; | ||
|  | } | ||
|  | printf STDERR $ofmt, "Picture state:", | ||
|  |     colour_if( $astate > 0, 'bold yellow on_magenta', $pstates[$astate] ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Flag other assets so they aren't missed | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($has_extra) { | ||
|  |     printf STDERR $ofmt, "Asset state:", | ||
|  |         colored( ['bold yellow on_magenta'], 'Other files uploaded' ); | ||
|  | } | ||
|  | 
 | ||
|  | if (@assets) { | ||
|  |     printf STDERR $ofmt, "Archives: ", join( ', ', @archives ) | ||
|  |         if $has_archives; | ||
|  |     # printf STDERR $ofmt, "All assets:", join( ', ', @assets ); | ||
|  |     printf STDERR "%s\n", | ||
|  |         textFormat( join( ', ', @assets ), 'All assets:', 'L', 18, 80 ); | ||
|  |     if ( $sanitised > 0 ) { | ||
|  |         printf STDERR $ofmt, 'Assets sanitised:', "$sanitised"; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # End of content report | ||
|  | # | ||
|  | print STDERR '-' x 80, "\n"; | ||
|  | 
 | ||
|  | # | ||
|  | # Log choices and problems | ||
|  | # | ||
|  | $log->info( $showno, | ||
|  |     "Declared format: " . $content->{metadata}{Shownotes_Format} ); | ||
|  | $log->info( $showno, "Recommended markup processing: $markup_choice" ); | ||
|  | 
 | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Output to relevant files | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Write the declared format to a file if requested | ||
|  | # | ||
|  | if ( defined($formatfile) ) { | ||
|  |     open( my $fmtfile, '>:encoding(UTF-8)', $formatfile ) | ||
|  |         or die "Unable to open output file $formatfile $!\n"; | ||
|  |     print $fmtfile $content->{metadata}{Shownotes_Format}, "\n"; | ||
|  |     close($fmtfile); | ||
|  |     print colored( "Format details written to $formatfile", 'green' ), "\n" | ||
|  |         unless $silent; | ||
|  |     $log->info( $showno, "Format details written to $formatfile" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Write the release date to a file if requested | ||
|  | # | ||
|  | if ( defined($releasefile) ) { | ||
|  |     open( my $relfile, '>:encoding(UTF-8)', $releasefile ) | ||
|  |         or die "Unable to open output file $releasefile $!\n"; | ||
|  |     print $relfile $content->{metadata}{Episode_Date}, "\n"; | ||
|  |     close($relfile); | ||
|  |     print colored( "Release date written to $releasefile", 'green' ), "\n" | ||
|  |         unless $silent; | ||
|  |     $log->info( $showno, "Release date written to $releasefile" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Write the picture file names to a file if requested and there are some | ||
|  | # | ||
|  | if ( @pictures && defined($picturefile) ) { | ||
|  |     open( my $picfile, '>:encoding(UTF-8)', $picturefile ) | ||
|  |         or die "Unable to open output file $picturefile $!\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Sort the pictures alphabetically in case the host didn't order them | ||
|  |     # sensibly. | ||
|  |     # | ||
|  |     foreach my $picture (sort(@pictures)) { | ||
|  |         print $picfile "$picture\n"; | ||
|  |     } | ||
|  |     close($picfile); | ||
|  | 
 | ||
|  |     print colored( "Picture names written to $picturefile", 'green' ), "\n" | ||
|  |         unless $silent; | ||
|  |     $log->info( $showno, "Picture names written to $picturefile" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Write the asset file names to a file if requested and there are some | ||
|  | # | ||
|  | if ( @assets && defined($assetfile) ) { | ||
|  |     open( my $assfile, '>:encoding(UTF-8)', $assetfile ) | ||
|  |         or die "Unable to open output file $assetfile $!\n"; | ||
|  |     foreach my $asset (@assets) { | ||
|  |         print $assfile "$asset\n"; | ||
|  |     } | ||
|  |     close($assfile); | ||
|  |     print colored( "Asset names written to $assetfile", 'green' ), "\n" | ||
|  |         unless $silent; | ||
|  |     $log->info( $showno, "Asset names written to $assetfile" ); | ||
|  | } | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Write the shownotes component to a file if requested | ||
|  | #------------------------------------------------------------------------------- | ||
|  | if ($shownotes) { | ||
|  |     # | ||
|  |     # Remove carriage returns | ||
|  |     # | ||
|  |     $content->{episode}{Show_Notes} =~ s/\r//g; | ||
|  |     _debug( $DEBUG > 2, quote_crlf( $content->{episode}{Show_Notes} ) ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Save the notes | ||
|  |     # | ||
|  |     open( my $out, '>:encoding(UTF-8)', $shownotes ) | ||
|  |         or die "Unable to open output file $shownotes: $!\n"; | ||
|  |     print $out $content->{episode}{Show_Notes}, "\n"; | ||
|  |     close($out); | ||
|  |     print colored( "Shownotes written to $shownotes", 'green' ), "\n" | ||
|  |         unless $silent; | ||
|  |     $log->info( $showno, "Shownotes written to $shownotes" ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # We're generating shownotes and we have declared HTML (or we're pretty | ||
|  |     # sure after parsing that they are HTML), so save the notes as HTML as | ||
|  |     # well. Now achieved simply by symlinking the two files if they are | ||
|  |     # different. This allows the 'do_edit` script to edit the *.out file and | ||
|  |     # thereby affect the *.html file. | ||
|  |     # | ||
|  |     # 2018-09-13: Removed the check for $markup_choice because it's adding | ||
|  |     # links inappropriately. | ||
|  |     # | ||
|  |     # 2020-05-31: Turned the symlink to a hard link | ||
|  |     # | ||
|  |     if ( $content->{metadata}{Shownotes_Format} =~ /html5/i ) { | ||
|  |         ( my $htmlfile = $shownotes ) =~ s/\.[^.]+$/.html/; | ||
|  | 
 | ||
|  |         unless ( $htmlfile eq $shownotes ) { | ||
|  |             if ( link( $shownotes, $htmlfile ) ) { | ||
|  |                 print colored( | ||
|  |                     "HTML notes - linked $htmlfile to $shownotes", 'green' | ||
|  |                     ), | ||
|  |                     "\n"; | ||
|  |                 $log->info( $showno, | ||
|  |                     "HTML notes - linked $htmlfile to $shownotes" ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 print colored( | ||
|  |                     "**Warning** Failed to link $htmlfile to $shownotes", | ||
|  |                     'red' | ||
|  |                     ), | ||
|  |                     "\n"; | ||
|  |                 print colored( "Equivalent CLI command:", 'yellow' ), | ||
|  |                     "\n", | ||
|  |                     colored( "ln $shownotes $htmlfile", 'yellow' ), "\n"; | ||
|  |                 $log->warning( $showno, | ||
|  |                     "Failed to link $htmlfile to $shownotes" ); | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | } | ||
|  | else { | ||
|  |     print colored( "No shownotes written", 'green' ), "\n" unless $silent; | ||
|  |     $log->info( $showno, "No shownotes written" ); | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Save the status | ||
|  | # | ||
|  | update_status($statusfile,'parsed'); | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: find_archive | ||
|  | #      PURPOSE: Look though the assets for files that are some kind of archive | ||
|  | #   PARAMETERS: $assets         hashref pointing to the asset filename list | ||
|  | #                               (contains size and type information) | ||
|  | #               $archfiles      arrayref to hold the archive files found | ||
|  | #      RETURNS: Number of archive files found | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: Uses 'any' from 'List::MoreUtils' | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub find_archive { | ||
|  |     my ( $assets, $archfiles ) = @_; | ||
|  | 
 | ||
|  |     my ( $count, $type ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return if no assets given | ||
|  |     # | ||
|  |     return 0 unless %$assets; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Known archive types (from what's been seen so far). Declared as | ||
|  |     # a 'state' variable so it's only initialised once. | ||
|  |     # | ||
|  |     state @mime_types = ( | ||
|  |         #{{{ | ||
|  |         'application/x-tar', | ||
|  |         'application/x-bzip-compressed-tar', | ||
|  |         'application/x-compressed-tar', | ||
|  |         'application/x-gtar', | ||
|  |         'application/zip', | ||
|  |         'application/x-zip-compressed', | ||
|  |         'application/x-bzip2', | ||
|  |         'application/gzip', | ||
|  |         #}}} | ||
|  |     ); | ||
|  | 
 | ||
|  |     $count = 0; | ||
|  |     foreach my $file ( keys(%$assets) ) { | ||
|  |         $type = $assets->{$file}->{type}; | ||
|  |         if ( any { $_ eq $type } @mime_types ) { | ||
|  |             push( @$archfiles, $file ); | ||
|  |             $count++; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return $count; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: extract_archives | ||
|  | #      PURPOSE: Given a list of archive files, extract their contents and save | ||
|  | #               the file names | ||
|  | #   PARAMETERS: $assets         hashref containing the filename list and some | ||
|  | #                               attributes. Read and written. | ||
|  | #               $archfiles      arrayref holding the archive files found (by | ||
|  | #                               find_archive). Read only. | ||
|  | #               $extracted      arrayref to hold the files extracted from the | ||
|  | #                               archives. | ||
|  | #               $showdir        path to find the archive file(s) | ||
|  | #      RETURNS: Number of files extracted. Alters %$assets to reflect the new | ||
|  | #               files and remove the extracted archive files. | ||
|  | #  DESCRIPTION: Just return with zero if there are no archives. | ||
|  | #               Process the archives one by one. Open the current one with | ||
|  | #               Archive::Any and save the file contents to return to the | ||
|  | #               caller. Extract the archive contents to the 'uploads' | ||
|  | #               directory in the directory for the show. | ||
|  | #               Remove all the archive file references from %$assets. Add in | ||
|  | #               the extracted files using MIME::Types to get the mime details | ||
|  | #               and 'stat' to get the sizes. Keep a cound of the extracted | ||
|  | #               files to return to the caller. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub extract_archives { | ||
|  |     my ( $assets, $archfiles, $extracted, $showdir ) = @_; | ||
|  | 
 | ||
|  |     my ( $aa, $mt, $typename, $filecount ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return if no archives given | ||
|  |     # | ||
|  |     return 0 unless @$archfiles; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Loop through the archives we were given. There could be more than one, | ||
|  |     # though it's likely to be rare. | ||
|  |     # | ||
|  |     foreach my $arch (@$archfiles) { | ||
|  |         #print "D> Archive file: $arch\n"; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Open the archive file | ||
|  |         # | ||
|  |         $aa = Archive::Any->new("$showdir/$arch"); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Get and store a list of the contents of the archive | ||
|  |         # | ||
|  |         push( @$extracted, $aa->files ); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Extract everything to the show directory | ||
|  |         # | ||
|  |         #print "About to extract to $assetdir\n"; | ||
|  |         $aa->extract($showdir); | ||
|  |     } | ||
|  | 
 | ||
|  |     #print "D> Extracted files: ",join("\nD> ", @$extracted),"\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Now update the %$assets hash, first deleting the archive files we have | ||
|  |     # extracted the contents of. | ||
|  |     # | ||
|  |     foreach my $arch (@$archfiles) { | ||
|  |         delete( $assets->{$arch} ); | ||
|  |     } | ||
|  |     # _debug(1,'%$assets after deletions: ' . Dumper($assets)); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Now add in the extracted files with their details | ||
|  |     # | ||
|  |     foreach my $asset (@$extracted) { | ||
|  |         # | ||
|  |         # Rationalise the filename | ||
|  |         # | ||
|  |         $asset = canonpath($asset); | ||
|  | 
 | ||
|  |         # | ||
|  |         # Get the MIME type of the file (stringifying the result of mimeTypeOf | ||
|  |         # otherwise it's a MIME::Type object) | ||
|  |         # | ||
|  |         $mt               = MIME::Types->new; | ||
|  |         $typename         = "" . $mt->mimeTypeOf("$showdir/$asset"); | ||
|  |         $assets->{$asset} = { | ||
|  |             type => $typename, | ||
|  |             size => ( stat("$showdir/$asset") )[7], | ||
|  |         }; | ||
|  | 
 | ||
|  |         $filecount++; | ||
|  |     } | ||
|  |     # _debug(1,'%$assets after additions: ' . Dumper($assets)); | ||
|  | 
 | ||
|  |     return $filecount; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: array_difference | ||
|  | #      PURPOSE: Determine the members of the left array not in the right array | ||
|  | #   PARAMETERS: $left           arrayref to the left-hand array | ||
|  | #               $right          arrayref to the right-hand array | ||
|  | #      RETURNS: The difference between the two arrays (@$left - @$right in | ||
|  | #               concept). All the elements that are in @$left but not in | ||
|  | #               @$right are returned. | ||
|  | #  DESCRIPTION: Loads a hash with keys from the $left array. This will remove | ||
|  | #               duplicates as a by-product. Using the $right array to provide | ||
|  | #               keys we delete all hash elements that match. This leaves the | ||
|  | #               hash containing the difference between the two arrays by | ||
|  | #               "subtracting" the right from the left. Another consequence of | ||
|  | #               the method used is that the resulting list is not in the same | ||
|  | #               order as the $left array. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub array_difference { | ||
|  |     my ( $left, $right ) = @_; | ||
|  | 
 | ||
|  |     my %files; | ||
|  | 
 | ||
|  |     unless (ref($left) eq 'ARRAY' && ref($right) eq 'ARRAY') { | ||
|  |         warn "Arguments to array_difference must both be arrayrefs\n"; | ||
|  |         return; | ||
|  |     } | ||
|  | 
 | ||
|  |     @files{@$left} = ();          # All files are the keys. | ||
|  |     delete( @files{@$right} );    # Remove the links. | ||
|  | 
 | ||
|  |     return keys(%files); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: prepare_assets | ||
|  | #      PURPOSE: Tidy assets, removing directories if necessary, and place the | ||
|  | #               files in the upload directory | ||
|  | #   PARAMETERS: $assets_h       hashref containing asset details | ||
|  | #               $assets_a       arrayref containing asset names | ||
|  | #               $showdir        string containing the show directory | ||
|  | #               $assetdir       string containing the asset upload directory | ||
|  | #                               path | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION:  | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub prepare_assets { | ||
|  |     my ( $assets_h, $assets_a, $showdir, $assetdir ) = @_; | ||
|  | 
 | ||
|  |     my ( $file, $dir, $base, @dels, %dirs, %moves ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Create the asset directory | ||
|  |     # | ||
|  |     unless ( -e $assetdir ) { | ||
|  |         mkdir($assetdir) or die "Failed to create $assetdir\n"; | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Remove directory stuff from @assets elements and %assets keys saving | ||
|  |     # directories, and moving files as we go. | ||
|  |     # | ||
|  |     # foreach my $file (@$assets_a) { | ||
|  |     for ( my $i = 0; $i <= $#{$assets_a}; $i++ ) { | ||
|  |         # | ||
|  |         # Get and parse an asset filename (using Path::Class). Force the | ||
|  |         # directory and base part to become strings ("stringify") | ||
|  |         # | ||
|  |         $file = file( $assets_a->[$i] ); | ||
|  |         $dir  = "" . $file->dir; | ||
|  |         $base = "" . $file->basename; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Does the asset have a directory part and a basename part? If so we | ||
|  |         # want to move the file to the asset directory and eventually delete | ||
|  |         # the directory the asset came from. | ||
|  |         # | ||
|  |         # if ( ( $dir, $base ) = ( $file =~ qr{^(.*/)(.+)$} ) ) { | ||
|  |         if ( $dir !~ /^\.?$/ && $base ne '' ) { | ||
|  |             # | ||
|  |             # The asset filename starts with a directory. | ||
|  |             # Save it. | ||
|  |             # | ||
|  |             $dirs{$dir} = 1 unless exists( $dirs{$dir} ); | ||
|  |             # push( @dirs, $dir ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Update the hash key | ||
|  |             # | ||
|  |             $assets_h->{$base} = delete( $assets_h->{$assets_a->[$i]} ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Save the move | ||
|  |             # | ||
|  |             $moves{"$showdir/$file"} = "$assetdir/$base"; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Alter the asset name in @assets | ||
|  |             # | ||
|  |             # $file = $base; | ||
|  |             $assets_a->[$i] = $base; | ||
|  |         } | ||
|  |         elsif ( $base eq '' ) { | ||
|  |             # | ||
|  |             # We just have a directory, no basename, so queue to delete it | ||
|  |             # | ||
|  |             push(@dels,$i); | ||
|  |         } | ||
|  |         else { | ||
|  |             # | ||
|  |             # No directory (or just '.'), just a basename, so queue the move | ||
|  |             # | ||
|  |             $moves{"$showdir/$base"} = "$assetdir/$base"; | ||
|  |         } | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Perform any moves (using File::Copy) | ||
|  |     # | ||
|  |     foreach my $move (keys(%moves)) { | ||
|  |         # TODO: Is the 'unless' needed? | ||
|  |         move ($move,$moves{$move}) unless ($move =~ qr{/$}); | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Clean out unwanted directories from @$assets_a | ||
|  |     # | ||
|  |     foreach my $del (@dels) { | ||
|  |         splice(@$assets_a,$del,1); | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # | ||
|  |     # Remove any real directories created by unpacking archives | ||
|  |     # | ||
|  |     foreach my $d (keys(%dirs)) { | ||
|  |         rmdir("$showdir/$d") or | ||
|  |             warn "Problem deleting unwanted directory: $d\n"; | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: sanitise_filenames | ||
|  | #      PURPOSE: Deal with filenames with spaces to make life easier | ||
|  | #   PARAMETERS: $assets_h       hashref containing asset details | ||
|  | #               $assets_a       arrayref containing asset names | ||
|  | #               $assetdir       string containing the asset upload directory | ||
|  | #                               path | ||
|  | #      RETURNS: Number of fixes | ||
|  | #  DESCRIPTION:  | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub sanitise_filenames { | ||
|  |     my ( $assets_h, $assets_a, $assetdir ) = @_; | ||
|  | 
 | ||
|  |     my ( $file, $newfile, %moves, $changes ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Look at all asset filenames | ||
|  |     # | ||
|  |     for ( my $i = 0; $i <= $#{$assets_a}; $i++ ) { | ||
|  |         $file = $assets_a->[$i]; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Does the name have whitespace? | ||
|  |         # | ||
|  |         if ($file =~ /\s/) { | ||
|  |             # | ||
|  |             # Sanitise and save the change | ||
|  |             # | ||
|  |             ($newfile = $file) =~ s/ /_/g; | ||
|  | 
 | ||
|  |             # | ||
|  |             # Adjust asset array and hash | ||
|  |             # | ||
|  |             $assets_a->[$i] = $newfile; | ||
|  |             $assets_h->{$newfile} = delete( $assets_h->{$file} ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Save the file move | ||
|  |             # | ||
|  |             $moves{"$assetdir/$file"} = "$assetdir/$newfile"; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     if (%moves) { | ||
|  |         # | ||
|  |         # Perform any moves (using File::Copy) | ||
|  |         # | ||
|  |         $changes = 0; | ||
|  |         foreach my $move (keys(%moves)) { | ||
|  |             move ($move,$moves{$move}); | ||
|  |             $changes++; | ||
|  |         } | ||
|  | 
 | ||
|  |         return $changes; | ||
|  |     } | ||
|  |     else { | ||
|  |         return 0; | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: choose_markup | ||
|  | #      PURPOSE: Given a list of possible markup types, choose which one to | ||
|  | #               recommend | ||
|  | #   PARAMETERS: $markup_found   Arrayref containing markup names | ||
|  | #      RETURNS: The markup name to use | ||
|  | #  DESCRIPTION: The array referenced by $markup_found should have one or more | ||
|  | #               instances of a markup type. We want to decide which one to | ||
|  | #               recommend for processing. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub choose_markup { | ||
|  |     my ($markup_found) = @_; | ||
|  | 
 | ||
|  |     my %freq; | ||
|  |     my @res; | ||
|  | 
 | ||
|  |     return 'none' unless $markup_found && scalar(@$markup_found) > 0; | ||
|  |     return 'none' if $markup_found->[0] eq 'none'; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Collect frequencies of each markup type | ||
|  |     # | ||
|  |     foreach my $mu (@$markup_found) { | ||
|  |         if ( exists( $freq{$mu} ) ) { | ||
|  |             $freq{$mu}++; | ||
|  |         } | ||
|  |         else { | ||
|  |             $freq{$mu} = 1; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # If there's only one key then that's the recommendation | ||
|  |     # | ||
|  |     return ( keys(%freq) )[0] if scalar( keys(%freq) ) == 1; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Find the highest frequency | ||
|  |     # | ||
|  |     my $max = 0; | ||
|  |     foreach my $key ( keys(%freq) ) { | ||
|  |         if ( $freq{$key} > $max ) { | ||
|  |             $max = $freq{$key}; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Collect all the keys with the maximum frequency | ||
|  |     # | ||
|  |     foreach my $key ( keys(%freq) ) { | ||
|  |         if ( $freq{$key} == $max ) { | ||
|  |             push( @res, $key ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return the matching markup types | ||
|  |     # | ||
|  |     return join( ' or ', @res ); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: summarise_markup | ||
|  | #      PURPOSE: Summarise the list of matches to markup types | ||
|  | #   PARAMETERS: $markup_found   Arrayref containing markup names | ||
|  | #      RETURNS: A string containing markup types and their frequencies | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub summarise_markup { | ||
|  |     my ($markup_found) = @_; | ||
|  | 
 | ||
|  |     my %freq; | ||
|  |     my @res; | ||
|  | 
 | ||
|  |     return 'none' unless $markup_found && scalar(@$markup_found) > 0; | ||
|  |     return 'none' if $markup_found->[0] eq 'none'; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Collect frequencies of each markup type | ||
|  |     # | ||
|  |     foreach my $mu (@$markup_found) { | ||
|  |         if ( exists( $freq{$mu} ) ) { | ||
|  |             $freq{$mu}++; | ||
|  |         } | ||
|  |         else { | ||
|  |             $freq{$mu} = 1; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     foreach my $key ( sort( keys(%freq) ) ) { | ||
|  |         push( @res, "$key($freq{$key})" ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return join( ", ", @res ); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: detect_markup | ||
|  | #      PURPOSE: Tries to detect whether there is any markup in the string and | ||
|  | #               if so what it is | ||
|  | #   PARAMETERS: $string         String to examine | ||
|  | #               $markup_types   Arrayref pointing to regular expression | ||
|  | #                               structures to be used in the detection process | ||
|  | #               $ofmt           Output format for 'printf' | ||
|  | #      RETURNS: A string containing the markup type(s) or 'none' | ||
|  | #  DESCRIPTION: Applies regular expressions to the string (notes) looking for | ||
|  | #               markup signatures. Array @markup_types contains one hash per | ||
|  | #               regex, along with an array of markdown types the regex relates | ||
|  | #               to and a comment explaining what's being detected. The regex | ||
|  | #               is matched against the string and a count of matches returned. | ||
|  | #               If there are matches then an analysis is printed on STDERR. | ||
|  | #               The variable MARKUP_DETECTED is a global variable accessed | ||
|  | #               through 'our' which controls whether the analyses have | ||
|  | #               a single header. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub detect_markup { | ||
|  |     my ( $string, $markup_types, $ofmt ) = @_; | ||
|  | 
 | ||
|  |     my ( @type, $count ); | ||
|  |     our $MARKUP_DETECTED; | ||
|  | 
 | ||
|  |     return ['none'] unless $string; | ||
|  | 
 | ||
|  |     foreach my $test (@$markup_types) { | ||
|  |         if ( $count = () = ( $string =~ /$test->{regex}/g ) ) { | ||
|  |             print STDERR "Markup check:\n" if ( $MARKUP_DETECTED++ == 0 ); | ||
|  |             printf STDERR $ofmt, "- Matched:", | ||
|  |                 $test->{regex} . " -> " . join( ",", @{ $test->{type} } ); | ||
|  |             printf STDERR $ofmt, "  Comment:", $test->{comment}; | ||
|  |             printf STDERR $ofmt, "  Matches:", $count; | ||
|  |             push( @type, @{ $test->{type} } ) for 1 .. $count; | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return ( @type ? \@type : ['none'] ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: count_tags | ||
|  | #      PURPOSE: Counts the number of start tags in what might be HTML | ||
|  | #   PARAMETERS: $string         String to examine | ||
|  | #      RETURNS: An integer count of the number of start tags found | ||
|  | #  DESCRIPTION: Uses HTML::Parser to parse the input string. A handler is | ||
|  | #               defined which accumulates start and end tags in an array. The | ||
|  | #               number of start tags found is returned to the caller. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub count_tags { | ||
|  |     my ($string) = @_; | ||
|  | 
 | ||
|  |     my @accum; | ||
|  |     chomp($string); | ||
|  |     return 0 unless $string; | ||
|  | 
 | ||
|  |     my $p = HTML::Parser->new( | ||
|  |         api_version => 3, | ||
|  |         handlers    => { | ||
|  |             start => [ \@accum, "event,text" ], | ||
|  |             end   => [ \@accum, "event,text" ] | ||
|  |         } | ||
|  |     ); | ||
|  |     $p->parse($string); | ||
|  |     $p->eof; | ||
|  | 
 | ||
|  |     return scalar(@accum); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: html_level | ||
|  | #      PURPOSE: Returns the proportion of HTML in a string | ||
|  | #   PARAMETERS: $string         String to examine | ||
|  | #               $tag_count      Number of start & end tags | ||
|  | #      RETURNS: A floating point number representing the (very) rough | ||
|  | #               percentage of HTML tags in the string | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub html_level { | ||
|  |     my ( $string, $tag_count ) = @_; | ||
|  | 
 | ||
|  |     return '0.0' unless $string; | ||
|  | 
 | ||
|  |     chomp($string); | ||
|  |     my @words = split( /\s+/, $string ); | ||
|  |     my $wc    = scalar(@words); | ||
|  | 
 | ||
|  |     return sprintf( "%.2f", ( $tag_count / $wc ) * 100 ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: check_html | ||
|  | #      PURPOSE: Checks an HTML string to ensure HPR links are valid | ||
|  | #   PARAMETERS: $html           HTML string | ||
|  | #      RETURNS: True if valid, otherwise false | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub check_html { | ||
|  |     my ( $html, $base_url ) = @_; | ||
|  | 
 | ||
|  |     my ( $parser, @links, $result, $etype, $aname, $avalue ); | ||
|  |     my ( $base_re, $show_re, $host_re, $anchor_re, $asset_re ); | ||
|  | 
 | ||
|  |     chomp($html); | ||
|  |     return 1 unless $html; | ||
|  | 
 | ||
|  |     $parser = HTML::LinkExtor->new( undef, $base_url ); | ||
|  |     $parser->parse($html); | ||
|  |     @links = $parser->links; | ||
|  | 
 | ||
|  |     #    print Dumper( \@links ), "\n"; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Make some regular expressions. Note that the $base_url is expected to | ||
|  |     # end with a '/' | ||
|  |     # | ||
|  |     $base_re   = qr{^${base_url}}; | ||
|  |     $host_re   = qr{${base_re}correspondents.php\?hostid=\d{1,4}$}; | ||
|  |     $show_re   = qr{${base_re}eps.php\?id=\d{1,4}$}; | ||
|  |     $anchor_re = qr{${base_re}\#.+$}; | ||
|  |     $asset_re  = qr{${base_re}eps/hpr\d{1,4}/.+$}; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Walk the collected links | ||
|  |     # | ||
|  |     $result = 0; | ||
|  |     foreach my $element (@links) { | ||
|  |         # | ||
|  |         # Collect the parsed links | ||
|  |         # | ||
|  |         ( $etype, $aname, $avalue ) = @$element; | ||
|  | 
 | ||
|  |         # | ||
|  |         # Only check HPR links | ||
|  |         # | ||
|  |         if ( $avalue =~ /$base_re/ ) { | ||
|  | 
 | ||
|  |             # | ||
|  |             # Not interested in correctly formed asset or show links | ||
|  |             # | ||
|  |             return 1 | ||
|  |                 if ( $avalue =~ /$show_re|$host_re|$anchor_re|$asset_re/ ); | ||
|  | 
 | ||
|  |             # | ||
|  |             # Something seems to be wrong with a link | ||
|  |             # | ||
|  |             if ( $etype eq 'a' ) { | ||
|  |                 $result++; | ||
|  |                 print "<A> tag $avalue\n" unless $silent; | ||
|  |             } | ||
|  |             elsif ( $etype eq 'img' ) { | ||
|  |                 $result++; | ||
|  |                 print "<A> tag $avalue\n" unless $silent; | ||
|  |             } | ||
|  | 
 | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return $result == 0; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: find_Unicode | ||
|  | #      PURPOSE: Scans a text string for Unicode in case we want to remove it | ||
|  | #   PARAMETERS: $string         String to examine | ||
|  | #      RETURNS: True (1) if Unicode is found, otherwise False (0) | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub find_Unicode { | ||
|  |     my ($string) = @_; | ||
|  | 
 | ||
|  |     return ( $string =~ /[^\x{00}-\x{7F}]/ ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: output_file_name | ||
|  | #      PURPOSE: Generate an output file name with three choices | ||
|  | #   PARAMETERS: $optarg         the argument to the option choosing the filename | ||
|  | #               $showno         the show number to add to certain name types | ||
|  | #               $template       a default 'sprintf' template for the name | ||
|  | #      RETURNS: The name of the output file | ||
|  | #  DESCRIPTION: If there's a defined output filename then there are three | ||
|  | #               options: a null string, a plain filename and a substitution | ||
|  | #               string with '%d' sequences. The null string means the user used | ||
|  | #               '-option' without a value, so we want to generate a substitution | ||
|  | #               string. A string with '%d' requires a check to ensure there's | ||
|  | #               the right number, just one. The plain filename needs no more | ||
|  | #               work. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub output_file_name { | ||
|  |     my ( $optarg, $showno, $template ) = @_; | ||
|  | 
 | ||
|  |     my ( $filename, $count ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # We shouldn't be called with a null option argument | ||
|  |     # | ||
|  |     return unless defined($optarg); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Does the option have an argument? | ||
|  |     # | ||
|  |     if ( $optarg =~ /^$/ ) { | ||
|  |         # | ||
|  |         # No argument; use the show number from the -episode=N option | ||
|  |         # | ||
|  |         $filename = sprintf( $template, $showno ); | ||
|  |     } | ||
|  |     elsif ( $optarg =~ /%d/ ) { | ||
|  |         # | ||
|  |         # There's an argument, does it have a '%d' in it? | ||
|  |         # | ||
|  |         $count = () = $optarg =~ /%d/g; | ||
|  |         die "Invalid - too many '%d' sequences in '$optarg'\n" | ||
|  |             if ( $count > 1 ); | ||
|  |         $filename = sprintf( $optarg, $showno ); | ||
|  |     } | ||
|  |     else { | ||
|  |         # | ||
|  |         # It's a plain filename, just return it | ||
|  |         # | ||
|  |         $filename = $optarg; | ||
|  |     } | ||
|  | 
 | ||
|  |     return $filename; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: update_status | ||
|  | #      PURPOSE: Updates the status file | ||
|  | #   PARAMETERS: $sfile          Name and path of the status file | ||
|  | #               $state          State to set, such as 'parsed' | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Appends the state string to the named file. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub update_status { | ||
|  |     my ($sfile, $state) = @_; | ||
|  | 
 | ||
|  |     open( my $sfh, '>>:encoding(UTF-8)', $sfile ) | ||
|  |         or die "Unable to open output file $sfile $!\n"; | ||
|  |     print $sfh "$state\n"; | ||
|  |     close($sfh); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: spellcheck | ||
|  | #      PURPOSE: Perform simple spell checks on strings | ||
|  | #   PARAMETERS: $string         Input string to check | ||
|  | #      RETURNS: List containing any problem words or undef if none | ||
|  | #  DESCRIPTION:  | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub spellcheck { | ||
|  |     my ($string) = @_; | ||
|  | 
 | ||
|  |     my @errors; | ||
|  | 
 | ||
|  |     my $checker = Text::SpellChecker->new( | ||
|  |         text => $string, | ||
|  |         lang => 'en_GB.UTF-8' | ||
|  |     ); | ||
|  | 
 | ||
|  |     while ( my $word = $checker->next_word ) { | ||
|  |         push( @errors, $word ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return @errors; | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: textFormat | ||
|  | #      PURPOSE: Formats a block of text in an indented, wrapped style with | ||
|  | #               a label in the left column | ||
|  | #   PARAMETERS: $text           The text to be formatted, as a scalar string | ||
|  | #               $tag            The label to be added to the left of the top | ||
|  | #                               line | ||
|  | #               $align          Tag alignment, 'L' for left, otherwise right | ||
|  | #               $lmargin        Width of the left margin (assumed to be big | ||
|  | #                               enough for the tag) | ||
|  | #               $textwidth      The width of all of the text plus left margin | ||
|  | #                               (i.e.  the right margin) | ||
|  | #      RETURNS: The formatted result as a string | ||
|  | #  DESCRIPTION: Chops the incoming text into words (thereby removing any | ||
|  | #               formatting). Removes any leading spaces. Loops through the | ||
|  | #               wordlist building them into lines of the right length to fit | ||
|  | #               between the left and right margins. Saves the lines in an | ||
|  | #               array. Adds the tag to the first line with the alignment | ||
|  | #               requested then returns the array joined into a string. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: Inspired by Text::Format but *much* simpler. In fact T::F is | ||
|  | #               a nasty thing to have to use; I couldn't get it to do what | ||
|  | #               this routine does. | ||
|  | #               TODO Make the routine more resilient to silly input values. | ||
|  | #     SEE ALSO: | ||
|  | #=============================================================================== | ||
|  | sub textFormat { | ||
|  |     my ( $text, $tag, $align, $lmargin, $textwidth ) = @_; | ||
|  | 
 | ||
|  |     my ( $width, $word ); | ||
|  |     my ( @words, @buff, @wrap ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Build the tag early. If there's no text we'll just return the tag. | ||
|  |     # | ||
|  |     $tag = sprintf( "%*s", | ||
|  |         ( $align =~ /L/i ? ( $lmargin - 1 ) * -1 : $lmargin - 1 ), $tag ); | ||
|  | 
 | ||
|  |     return $tag unless $text; | ||
|  | 
 | ||
|  |     $text =~ s/(^\s+|\s+$)//g; | ||
|  |     return $tag unless $text; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Chop up the incoming text removing leading spaces | ||
|  |     # | ||
|  |     @words = split( /\s+/, $text ); | ||
|  |     shift(@words) if ( @words && $words[0] eq '' ); | ||
|  | 
 | ||
|  |     # | ||
|  |     # Compute the width of the active text | ||
|  |     # | ||
|  |     $width = $textwidth - $lmargin; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Format the words into lines with a blank left margin | ||
|  |     # | ||
|  |     while ( defined( $word = shift(@words) ) ) { | ||
|  |         if ( length( join( ' ', @buff, $word ) ) < $width ) { | ||
|  |             push( @buff, $word ); | ||
|  |         } | ||
|  |         else { | ||
|  |             push( @wrap, ' ' x $lmargin . join( ' ', @buff ) ); | ||
|  |             @buff = ($word); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     # | ||
|  |     # Append any remainder | ||
|  |     # | ||
|  |     push( @wrap, ' ' x $lmargin . join( ' ', @buff ) ) if @buff; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Insert the tag into the first line | ||
|  |     # | ||
|  |     substr( $wrap[0], 0, $lmargin - 1 ) = $tag; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Return the formatted array as a string | ||
|  |     # | ||
|  |     return join( "\n", @wrap ); | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: colour_if | ||
|  | #      PURPOSE: Colours a string if a condition is true | ||
|  | #   PARAMETERS: $test           criterion to determine colouring | ||
|  | #               $colour         colour code to use | ||
|  | #               $string         string to colour | ||
|  | #      RETURNS: The (non-)coloured string | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub colour_if { | ||
|  |     my ( $test, $colour, $string ) = @_; | ||
|  | 
 | ||
|  |     if ($test) { | ||
|  |         return colored( $string, $colour ); | ||
|  |     } | ||
|  |     else { | ||
|  |         return $string; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: colour_switch | ||
|  | #      PURPOSE: Chooses colours depending on multiple tests | ||
|  | #   PARAMETERS: $tests          arrayref containing a list of tests | ||
|  | #               $colours        arrayref containing a list of colour codes | ||
|  | #               $string         string to colour | ||
|  | #      RETURNS: The string with the desired colour (or none) | ||
|  | #  DESCRIPTION: The first test in @$tests which returns True causes the | ||
|  | #               corresponding colour to be chosen (unless it's missing) | ||
|  | #               otherwise, if no tests are True or the colour is missing, | ||
|  | #               there's no colour applied. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub colour_switch { | ||
|  |     my ( $tests, $colours, $string ) = @_; | ||
|  | 
 | ||
|  |     for ( my $i = 0; $i <= $#{$tests}; $i++ ) { | ||
|  |         if ( $tests->[$i] ) { | ||
|  |             if ( exists( $colours->[$i] ) ) { | ||
|  |                 return colored( $string, $colours->[$i] ); | ||
|  |             } | ||
|  |             else { | ||
|  |                 return $string; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return $string; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: alert | ||
|  | #      PURPOSE: Print an alert message using a format and colours | ||
|  | #   PARAMETERS: $fmt            the format to use. The first field is where | ||
|  | #                               the string 'ALERT' goes | ||
|  | #               $message        the alert message | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub alert { | ||
|  |     my ( $fmt, $message ) = @_; | ||
|  | 
 | ||
|  |     $fmt = "%-16s %s\n" unless $fmt; | ||
|  | #    print STDERR "$bold$red", sprintf( $fmt, "** ALERT **:", $message ), "$reset"; | ||
|  |     print STDERR colored( sprintf( $fmt, "** ALERT **:", $message ), | ||
|  |         'bold red' ); | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: coalesce | ||
|  | #      PURPOSE: To find the first defined argument and return it | ||
|  | #   PARAMETERS: Arbitrary number of arguments | ||
|  | #      RETURNS: The first defined argument or undef if there are none | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub coalesce { | ||
|  |     foreach (@_) { | ||
|  |         return $_ if defined($_); | ||
|  |     } | ||
|  |     return undef;    ## no critic | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: quote_crlf | ||
|  | #      PURPOSE: Display TAB (\t) and CRLF (\r\n) sequences for debugging | ||
|  | #   PARAMETERS: $string         String to process | ||
|  | #      RETURNS: The string with TAB, CR and LF characters made visible for | ||
|  | #               printing | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub quote_crlf { | ||
|  |     my ($string) = @_; | ||
|  | 
 | ||
|  |     my $quoted = $string; | ||
|  |     $quoted =~ s/\t/\\t/g; | ||
|  |     $quoted =~ s/\n/\\n/g; | ||
|  |     $quoted =~ s/\r/\\r/g; | ||
|  | 
 | ||
|  |     return "$quoted\n"; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: _debug | ||
|  | #      PURPOSE: Prints debug reports | ||
|  | #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | ||
|  | #               $message        Message to print | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | ||
|  | #               newline and then adds one in the 'print' to the caller doesn't | ||
|  | #               have to bother. Prepends the message with 'D> ' to show it's | ||
|  | #               a debug message. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub _debug { | ||
|  |     my ( $active, $message ) = @_; | ||
|  | 
 | ||
|  |     chomp($message); | ||
|  |     print STDERR "D> $message\n" if $active; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Options | ||
|  | #      PURPOSE: Processes command-line options | ||
|  | #   PARAMETERS: $optref     Hash reference to hold the options | ||
|  | #      RETURNS: Undef | ||
|  | #  DESCRIPTION: | ||
|  | #       THROWS: no exceptions | ||
|  | #     COMMENTS: none | ||
|  | #     SEE ALSO: n/a | ||
|  | #=============================================================================== | ||
|  | sub Options { | ||
|  |     my ($optref) = @_; | ||
|  | 
 | ||
|  |     my @options = ( | ||
|  |         "help",        "documentation|manpage", | ||
|  |         "debug=i",     "silent!", | ||
|  |         "episode=i",   "infile=s", | ||
|  |         "shownotes:s", "format=s", | ||
|  |         "release=s",   "assets=s", | ||
|  |         "pictures=s",  "zip=s", | ||
|  |         "test!", | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ( !GetOptions( $optref, @options ) ) { | ||
|  |         pod2usage( | ||
|  |             -msg     => "$PROG version $VERSION\n", | ||
|  |             -verbose => 0, | ||
|  |             -exitval => 1 | ||
|  |         ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | __END__ | ||
|  | 
 | ||
|  | 
 | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #  Application Documentation | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #{{{ | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | parse_JSON - parse the JSON output file from the HPR show submission form | ||
|  | 
 | ||
|  | =head1 VERSION | ||
|  | 
 | ||
|  | This documentation refers to parse_JSON version 0.0.14 | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 USAGE | ||
|  | 
 | ||
|  |     ./parse_JSON [-help] [-documentation|manpage] -episode=N -infile=FILE | ||
|  |          [-shownotes[=FILE]] [-format=FILE] [-release=FILE] [-assets=FILE] | ||
|  |          [-pictures=FILE] [-zip=FILE] [-debug=N] [-[no]silent] | ||
|  | 
 | ||
|  | 
 | ||
|  |     ./parse_JSON -episode=2384 -infile=shownotes/hpr2384/shownotes.json | ||
|  |     ./parse_JSON -episode=2379 -infile=shownotes/hpr2379/shownotes.json -shownotes | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | =over 8 | ||
|  | 
 | ||
|  | =item B<-help> | ||
|  | 
 | ||
|  | Prints a brief help message describing the usage of the program, and then exits. | ||
|  | 
 | ||
|  | =item B<-manpage> | ||
|  | 
 | ||
|  | Prints the entire documentation for the script. Note that this mode should use | ||
|  | I<perldoc> but there seems to be a bug in this tool at the moment; it does not | ||
|  | render properly. Currently we are using a simpler type of formatter. | ||
|  | 
 | ||
|  | =item B<-episode=N> | ||
|  | 
 | ||
|  | This mandatory option specifies the number of the HPR episode that is being | ||
|  | parsed. | ||
|  | 
 | ||
|  | =item B<-infile=FILE> | ||
|  | 
 | ||
|  | This mandatory option provides the file containing the JSON-format output from | ||
|  | the submission form on the HPR website. It is expected that the file has been | ||
|  | collected by external scripts and saved in a local file. | ||
|  | 
 | ||
|  | =item B<-shownotes[=FILE]> | ||
|  | 
 | ||
|  | This option, which may be omitted, defines the location where the parsed show | ||
|  | notes are to be written. If omitted no show notes are written, the script | ||
|  | simply parses the input file and reports on what it finds. | ||
|  | 
 | ||
|  | If the option is given as B<-shownotes=FILE> the notes are written to the | ||
|  | nominated file. | ||
|  | 
 | ||
|  | The B<FILE> portion may be given as a template string containing a I<%d> | ||
|  | sequence. This sequence is replaced by the show number provided by the | ||
|  | B<-episode=N> option. | ||
|  | 
 | ||
|  | If the B<=FILE> portion is omitted a default template of 'hpr%d.out' is used | ||
|  | and again the I<%d> sequence is replaced by the show number provided by the | ||
|  | B<-episode=N> option. | ||
|  | 
 | ||
|  | If the notes are declared as HTML5 then a symbolic link is made from the | ||
|  | parsed shownote file to a file where the extension is 'html'. This is to allow | ||
|  | the 'do_vim' script to edit the output file as in other cases but to ensure | ||
|  | that the file to be uploaded to the server is also edited. | ||
|  | 
 | ||
|  | =item B<-format=FILE> | ||
|  | 
 | ||
|  | This option, which may be omitted, causes the declared format to be written to | ||
|  | a nominated file. This is to allow other scripts to take action depending on | ||
|  | the format of the notes without having to determine this information | ||
|  | themselves. | ||
|  | 
 | ||
|  | If the option is omitted no format information is written. | ||
|  | 
 | ||
|  | =item B<-release=FILE> | ||
|  | 
 | ||
|  | This option, which may be omitted, causes the show's release date to be | ||
|  | written to a nominated file. This is to allow other scripts to take action | ||
|  | depending on the release date of the show without having to determine this | ||
|  | information themselves. | ||
|  | 
 | ||
|  | If the option is omitted no release date is written. | ||
|  | 
 | ||
|  | =item B<-assets=FILE> | ||
|  | 
 | ||
|  | This option, which may be omitted, causes a list of the show's assets to be | ||
|  | written to a nominated file. This is to allow other scripts to take action | ||
|  | depending on the assets of the show without having to determine this | ||
|  | information themselves. | ||
|  | 
 | ||
|  | Note that the assets do not include the main audio file. | ||
|  | 
 | ||
|  | If no assets exist or the option is omitted no asset list is written. | ||
|  | 
 | ||
|  | =item B<-pictures=FILE> | ||
|  | 
 | ||
|  | This option, which may be omitted, causes a list of any picture assets to be | ||
|  | written to a nominated file. This is to allow other scripts to take action | ||
|  | depending on pictures sent with the show without having to determine this | ||
|  | information themselves. | ||
|  | 
 | ||
|  | If no pictures exist or the option is omitted no picture list is written. | ||
|  | 
 | ||
|  | =item B<-zip=FILE> | ||
|  | 
 | ||
|  | This option, which may be omitted, causes assets to written to a Zip file | ||
|  | with the name given. This only happens if the assets were sent separately, and | ||
|  | not in an archive file of some kind. This is because the processing of some | ||
|  | assets (only pictures at the moment) can cause changes to be made to the | ||
|  | originals. This step can be taken to preserve the original files. | ||
|  | 
 | ||
|  | If no assets exist or an archive file already exists, or the option is omitted | ||
|  | no zip file is written. | ||
|  | 
 | ||
|  | =item B<-debug=N> | ||
|  | 
 | ||
|  | Causes certain debugging information to be displayed. | ||
|  | 
 | ||
|  |     0   (the default) no debug output | ||
|  |     1   N/A | ||
|  |     2   reports lines being parsed from the input file | ||
|  |     3   dumps two data structures: | ||
|  |             - @content: an array containing lines from the input file | ||
|  |             - %showdetails: a hash containing the parsed data | ||
|  | 
 | ||
|  | =item B<-[no]silent> | ||
|  | 
 | ||
|  | Controls the amount of information generated. If B<-silent> is selected the | ||
|  | output is reduced, otherwise B<-nosilent> is assumed or may be provided when | ||
|  | information about the generation of output files is reported. | ||
|  | 
 | ||
|  | =item B<-[no]test> | ||
|  | 
 | ||
|  | Used when running tests on the script. Controls the log output, adding 'T ' at | ||
|  | the start of the log record. If B<-test> is selected the | ||
|  | prefix is added, otherwise B<-notest> is assumed and no prefix is added. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | This script is meant to be a component in a workflow dealing with the notes | ||
|  | submitted by HPR hosts with the shows they are uploading. | ||
|  | 
 | ||
|  | The script reads a JSON file generated by the show upload form on the HPR | ||
|  | website.  It decodes this into a hash structure containing all of the elements | ||
|  | of the file. | ||
|  | 
 | ||
|  | Regardless of settings a check is made to ensure that the episode number | ||
|  | provided to the script matches that found in the input file. | ||
|  | 
 | ||
|  | An analysis of the show notes is carried out to determine how much HTML they | ||
|  | contain. The HTML level is determined as a percentage. If it is above 5% then | ||
|  | the notes are deemed to be HTML. If below 5% then an attempt is made to | ||
|  | determine what type of markup has been used. | ||
|  | 
 | ||
|  | An analysis of the host details is also made to determine the composition of | ||
|  | the text therein. | ||
|  | 
 | ||
|  | Reports are generated for all of these components. A recommendation of the | ||
|  | type of processing the notes will require is also made. This was intended to | ||
|  | assist with automatic processing, but has not proved to be reliable enough. | ||
|  | 
 | ||
|  | The author of the notes declares what format they are. This is not currently | ||
|  | compared with what the analysis reports, but this may be added in a later | ||
|  | version. | ||
|  | 
 | ||
|  | The notes are written to the nominated file (if there is one) for further | ||
|  | processing, database insertion, etc. | ||
|  | 
 | ||
|  | If the notes are declared to be HTML then a copy is written to a file with the | ||
|  | same name as the output file but with an 'html' suffix. This can be rendered | ||
|  | to see if the notes really are HTML and look good enough for uploading. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DIAGNOSTICS | ||
|  | 
 | ||
|  | Error and warning messages generated by the script. | ||
|  | 
 | ||
|  | =over 4 | ||
|  | 
 | ||
|  | =item B<Unable to access input file ...: ...> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The script attempted to open the input file but failed. The message will | ||
|  | contain details of the failure. | ||
|  | 
 | ||
|  | =item B<Input file ... is empty> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The input file exists but is empty | ||
|  | 
 | ||
|  | =item B<Invalid - too many '%d' sequences in '...'> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The template provided to the B<-shownotes=FILE> option contained too many '%d' | ||
|  | sequences. Ensure that there is only one of these. See above for details. | ||
|  | 
 | ||
|  | =item B<The JSON doesn't have the right structure> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | After a few basic checks on the components of the JSON file it does not seem | ||
|  | to be valid. The script expects there at least to be top-level sections with | ||
|  | the keys: I<episode>, I<metadata> and I<host>. | ||
|  | 
 | ||
|  | =item B<The episode specified (...) doesn't match the one in the JSON (...)> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The episode number in the B<-episode=N> option does not match the episode | ||
|  | number found in the JSON. Both numbers are reported in the error message. | ||
|  | 
 | ||
|  | =item B<Unable to open output file ...: ...> | ||
|  | 
 | ||
|  | Type: fatal | ||
|  | 
 | ||
|  | The script was unable to open one or both of the output files or the file of | ||
|  | format information. These will be named according to the B<-shownotes=FILE> | ||
|  | option with a suffix of '.out' or '.html' or specified by the B<-format=FILE> | ||
|  | option. | ||
|  | 
 | ||
|  | =back | ||
|  | 
 | ||
|  | =head1 DEPENDENCIES | ||
|  | 
 | ||
|  |     Data::Dumper | ||
|  |     Getopt::Long | ||
|  |     HTML::Parser | ||
|  |     JSON | ||
|  |     Link::Extor | ||
|  |     Log::Handler | ||
|  |     Pod::Usage | ||
|  |     Term::ANSIColor | ||
|  |     Text::CSV | ||
|  | 
 | ||
|  | =head1 BUGS AND LIMITATIONS | ||
|  | 
 | ||
|  | There are no known bugs in this module. | ||
|  | Please report problems to Dave Morriss  (Dave.Morriss@gmail.com) | ||
|  | Patches are welcome. | ||
|  | 
 | ||
|  | =head1 AUTHOR | ||
|  | 
 | ||
|  | Dave Morriss (Dave.Morriss@gmail.com) | ||
|  | 
 | ||
|  | =head1 LICENCE AND COPYRIGHT | ||
|  | 
 | ||
|  | Copyright (c) 2021 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. | ||
|  | 
 | ||
|  | This module is free software; you can redistribute it and/or | ||
|  | modify it under the same terms as Perl itself. See perldoc perlartistic. | ||
|  | 
 | ||
|  | This program is distributed in the hope that it will be useful, | ||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | #}}} | ||
|  | 
 | ||
|  | # [zo to open fold, zc to close] | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||
|  | 
 |