forked from HPR/hpr-tools
		
	Show_Submission/extract_images: some tidying, addition of more POD
    documentation.
Show_Submission/query2csv, Show_Submission/query2json: these softlinks
    were turned to hard links to make them more visible in the Git repo.
    The files are in the 'Database/' directory.
		
	
		
			
				
	
	
		
			740 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			740 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env perl
 | |
| #===============================================================================
 | |
| #
 | |
| #         FILE: extract_images
 | |
| #
 | |
| #        USAGE: ./extract_images [--help] [--documentation|man]
 | |
| #               [--prefix=STRING] [--[no-]backup] [--force] [--[no]silent]
 | |
| #               HTML_file [ [HTML_file_2] [HTML_file_3] ... ]
 | |
| #
 | |
| #  DESCRIPTION: Processes HTML files which may have 'data' scheme URIs containing
 | |
| #               images, and extracts these images into files in the same
 | |
| #               directory.  The 'data' scheme links are converted to 'https'
 | |
| #               and reference the extracted files. The modified HTML is
 | |
| #               output, and the original will be saved as a backup if
 | |
| #               requested.
 | |
| #
 | |
| #               The 'data' URI scheme is specified in RFC 2397.
 | |
| #
 | |
| #      OPTIONS: ---
 | |
| # REQUIREMENTS: ---
 | |
| #         BUGS: ---
 | |
| #        NOTES: ---
 | |
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | |
| #      VERSION: 0.0.3
 | |
| #      CREATED: 2024-12-25 10:53:15
 | |
| #     REVISION: 2024-12-31 20:56:50
 | |
| #
 | |
| #===============================================================================
 | |
| 
 | |
| use v5.36;
 | |
| use strict;
 | |
| use warnings;
 | |
| use feature qw{ say state try };
 | |
| no warnings qw{ experimental::try };
 | |
| 
 | |
| use open ':std', ':encoding(UTF-8)'; # Make all IO UTF-8
 | |
| 
 | |
| use Getopt::Long;
 | |
| use Pod::Usage;
 | |
| 
 | |
| use HTML::TreeBuilder 5 -weak;
 | |
| use URI;
 | |
| use MIME::Types;
 | |
| 
 | |
| use Path::Tiny;
 | |
| use File::Copy;
 | |
| 
 | |
| use Data::Dumper;
 | |
| 
 | |
| #
 | |
| # Version number (Incremented by Vim)
 | |
| #
 | |
| our $VERSION = '0.0.3';
 | |
| 
 | |
| #
 | |
| # Script and directory names
 | |
| #
 | |
| ( my $PROG = $0 ) =~ s|.*/||mx;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Declarations
 | |
| #-------------------------------------------------------------------------------
 | |
| my ( $notes,   $typename, $uri );
 | |
| my ( $fcount,  $basename, $filename, $suffix, $fh );
 | |
| my ( $updates, $bsuffix,  $newURL );
 | |
| 
 | |
| my $backupcount = 5;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Options and arguments
 | |
| #-------------------------------------------------------------------------------
 | |
| # {{{
 | |
| #
 | |
| # Option defaults
 | |
| #
 | |
| my $DEFDEBUG = 0;
 | |
| my $DEFPREFIX = 'image';
 | |
| 
 | |
| my %options;
 | |
| Options( \%options );
 | |
| 
 | |
| #
 | |
| # Default help shows minimal information
 | |
| #
 | |
| pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
 | |
|     if ( $options{'help'} );
 | |
| 
 | |
| #
 | |
| # The -documentation or -man option shows the full POD documentation through
 | |
| # a pager for convenience
 | |
| #
 | |
| pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
 | |
|     if ( $options{'documentation'} );
 | |
| 
 | |
| #
 | |
| # Collect options
 | |
| #
 | |
| my $DEBUG  = ( defined( $options{debug} )  ? $options{debug}  : $DEFDEBUG );
 | |
| my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 );
 | |
| my $prefix = ( defined( $options{prefix} ) ? $options{prefix} : $DEFPREFIX );
 | |
| my $backup = ( defined( $options{backup} ) ? $options{backup} : 1 );
 | |
| my $force  = ( defined( $options{force} )  ? $options{force}  : 0 );
 | |
| 
 | |
| #
 | |
| # Check we have arguments
 | |
| #
 | |
| pod2usage(
 | |
|     -msg     => "Missing arguments. One or more HTML file names are needed\n",
 | |
|     -exitval => 1,
 | |
|     -verbose => 0
 | |
|     )
 | |
| unless ( scalar(@ARGV) > 0 );
 | |
| 
 | |
| #
 | |
| # Clean up the prefix
 | |
| #
 | |
| $prefix = ($prefix =~ /(.*)_$/ ? $1 : $prefix);
 | |
| 
 | |
| #
 | |
| # Backup suffix (the dot gets added later)
 | |
| #
 | |
| $bsuffix = 'bak';
 | |
| 
 | |
| #
 | |
| # Debug the options
 | |
| #
 | |
| _debug(
 | |
|     $DEBUG > 1,
 | |
|     '$silent = ' . $silent,
 | |
|     '$prefix = ' . $prefix,
 | |
|     '$backup = ' . $backup,
 | |
|     '$force = ' . $force,
 | |
| );
 | |
| 
 | |
| # }}}
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Prepare a MIME::Types object
 | |
| #-------------------------------------------------------------------------------
 | |
| my $mt = MIME::Types->new;
 | |
| 
 | |
| #-------------------------------------------------------------------------------
 | |
| # Loop through the arguments
 | |
| #-------------------------------------------------------------------------------
 | |
| foreach my $notesfile (@ARGV) {
 | |
| 
 | |
|     #
 | |
|     # Get the absolute path of the argument
 | |
|     #
 | |
|     my $abs_nf = path($notesfile)->absolute;
 | |
| 
 | |
|     #
 | |
|     # Get the 'dirname' from the absolute path
 | |
|     #
 | |
|     my $dirname = path($abs_nf)->parent->stringify;
 | |
| 
 | |
|     unless (-e $abs_nf) {
 | |
|         warn "Unable to find $notesfile\n";
 | |
|         next;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Force the MIME type of $notesfile to a string
 | |
|     #
 | |
|     $typename = "" . coalesce( $mt->mimeTypeOf("$abs_nf"), '' );
 | |
| 
 | |
|     #
 | |
|     # Check the MIME type and reject non-HTML
 | |
|     #
 | |
|     unless ($typename eq 'text/html') {
 | |
|         warn "File $abs_nf is not HTML\n";
 | |
|         next
 | |
|     }
 | |
| 
 | |
|     say "Reading from $abs_nf\n" unless $silent;
 | |
| 
 | |
|     #
 | |
|     # Get HTML file basename (no path) without the suffix for building filenames
 | |
|     #
 | |
|     $basename = path($abs_nf)->basename('.html');
 | |
| 
 | |
|     #
 | |
|     # Read the HTML
 | |
|     #
 | |
|     open (my $nfh, '<', $notesfile) or die "Unable to open $notesfile\n";
 | |
|     $notes = <$nfh>;
 | |
|     close($nfh);
 | |
| 
 | |
|     #
 | |
|     # Image files are to have an index/sequence number
 | |
|     #
 | |
|     $fcount = 0;
 | |
| 
 | |
|     #
 | |
|     # Keep a note of HTML updates
 | |
|     #
 | |
|     $updates = 0;
 | |
| 
 | |
|     #
 | |
|     # Initialise the TreeBuilder
 | |
|     #
 | |
|     my $tree = HTML::TreeBuilder->new;
 | |
|     $tree->ignore_unknown(0);
 | |
|     $tree->no_expand_entities(1);
 | |
|     $tree->p_strict(1);
 | |
|     $tree->store_comments(1);
 | |
|     $tree->warn(1);
 | |
| 
 | |
|     #
 | |
|     # Load the HTML obtained from the file into the TreeBuilder
 | |
|     #
 | |
|     $tree->parse_content($notes)
 | |
|         or die "HTML::TreeBuilder failed to parse notes: $!\n";
 | |
| 
 | |
|     #
 | |
|     # Loop through the tree looking for 'data' scheme images
 | |
|     #
 | |
|     for ( @{ $tree->extract_links('img') } ) {
 | |
|         my ( $link, $element, $attr, $tag ) = @$_;
 | |
| 
 | |
|         $uri = URI->new($link);
 | |
|         unless ($silent) {
 | |
|             say "Scheme: ",     $uri->scheme;
 | |
|             say "Media type: ", $uri->media_type;
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # We only care about 'data' scheme stuff - for now anyway, and only
 | |
|         # images within this set
 | |
|         #
 | |
|         if ( $uri->scheme eq 'data' ) {
 | |
|             #
 | |
|             # Only images
 | |
|             #
 | |
|             if ( $uri->media_type =~ /^image/ ) {
 | |
|               #
 | |
|               # Extract the file name suffix from the MIME string, and give it
 | |
|               # a leading '.'
 | |
|               #
 | |
|                 ( $suffix = $uri->media_type ) =~ s{^.*/}{.};
 | |
| 
 | |
|                 #
 | |
|                 # Construct the filename for this image making sure it's in
 | |
|                 # the directory where the HTML is located.
 | |
|                 #
 | |
|                 # ${fileprefix}_${prefix}_${increment}.${extension}
 | |
|                 #
 | |
|                 $fcount++;
 | |
|                 $filename
 | |
|                     = "$dirname/${basename}_${prefix}_${fcount}${suffix}";
 | |
|                 say "Writing to: $filename" unless $silent;
 | |
| 
 | |
|                 say '-' x 40 unless $silent;
 | |
| 
 | |
|                 #
 | |
|                 # If the file exists and --force is not active, warn and skip.
 | |
|                 # Otherwise write the file.
 | |
|                 #
 | |
|                 if ( -e $filename && !$force ) {
 | |
|                     warn "File $filename exists; not overwriting\n";
 | |
|                 }
 | |
|                 else {
 | |
|                     #
 | |
|                     # Write the data to the file in binary format. The URI
 | |
|                     # module does the conversion so it's already binary.
 | |
|                     #
 | |
|                     $fh = path($filename)->openw_raw;
 | |
|                     print $fh $uri->data;
 | |
|                     close($fh);
 | |
|                 }
 | |
| 
 | |
|                 #
 | |
|                 # Update the HTML with a link to the file we created (or that
 | |
|                 # was already there from an earlier time).
 | |
|                 #
 | |
|                 $updates++;
 | |
|                 $newURL = path($filename)->basename;
 | |
|                 $element->attr( $attr, $newURL );
 | |
|             }
 | |
|         }
 | |
| 
 | |
|     }    # extract_links loop
 | |
| 
 | |
|     #
 | |
|     # Output the changed HTML turning what became standalone back into
 | |
|     # a <body> fragment.
 | |
|     #
 | |
|     if ($updates > 0) {
 | |
|         my $body = $tree->look_down( _tag => 'body' );
 | |
|         ( my $result = $body->as_HTML( undef, ' ', {} ) )
 | |
|             =~ s{(^<body[^>]*>|</body>$)}{}gi;
 | |
| 
 | |
|         #$notesfile = path($notesfile)->basename;
 | |
|         if ($backup) {
 | |
|             if (_backup( $abs_nf, $bsuffix, $backupcount )) {
 | |
|                 say "$notesfile backed up" unless $silent;
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             say "$notesfile not backed up" unless $silent;
 | |
|         }
 | |
| 
 | |
|         $fh = path($notesfile)->openw;
 | |
|         say $fh $result;
 | |
|         close($fh);
 | |
| 
 | |
|         say "$updates images converted, $notesfile updated" unless $silent;
 | |
|     }
 | |
|     else {
 | |
|         say "No images found, no changes made to $notesfile" unless $silent;
 | |
|     }
 | |
| 
 | |
| } # $notesfile loop
 | |
| 
 | |
| exit;
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _backup
 | |
| #      PURPOSE: Given a file, make a backup of it by appending $suffix, and
 | |
| #               handle previous backups
 | |
| #   PARAMETERS: $filename               path of file to backup
 | |
| #               $suffix                 suffix to use, default 'bck'
 | |
| #               $limit                  number of backups to keep
 | |
| #      RETURNS: True or false depending on success
 | |
| #  DESCRIPTION: Checks that the file exists and returns FALSE if it doesn't.
 | |
| #               Rationalises the $limit to avoid it being 0 or less.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub _backup {
 | |
|     my ( $filename, $suffix, $limit ) = @_;
 | |
| 
 | |
|     my ( $backupname, $limitname );
 | |
| 
 | |
|     #
 | |
|     # Check the target file exists
 | |
|     #
 | |
|     unless ( -e $filename ) {
 | |
|         warn "Unable to find $filename to backup\n";
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Handle invalid $limit values
 | |
|     #
 | |
|     $limit = 1 unless ( $limit >= 1 );
 | |
| 
 | |
|     #
 | |
|     # Defaults
 | |
|     #
 | |
|     $suffix     = 'bck' unless $suffix;
 | |
|     $limitname  = "$filename.$suffix.$limit";
 | |
|     $backupname = "$filename.$suffix";
 | |
| 
 | |
|     #
 | |
|     # Look for existing backups
 | |
|     #
 | |
|     if ( -e $backupname ) {
 | |
| 
 | |
|         #
 | |
|         # If maximum version exists delete it
 | |
|         #
 | |
|         if ( -e $limitname ) {
 | |
|             unlink($limitname);
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # Go backwards through the versions incrementing their version numbers
 | |
|         #
 | |
|         for ( my $vsn = $limit - 1; $vsn > 0; $vsn-- ) {
 | |
|             if ( -e "$filename.$suffix.$vsn" ) {
 | |
|                 move( "$filename.$suffix.$vsn",
 | |
|                     sprintf( '%s.%s.%s', $filename, $suffix, $vsn + 1 ) );
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         #
 | |
|         # Make $filename.bck into $filename.bck.1
 | |
|         #
 | |
|         move( "$filename.$suffix", "$filename.$suffix.1" );
 | |
| 
 | |
|     }
 | |
| 
 | |
|     #
 | |
|     # Finally save the $filename as $filename.bck
 | |
|     #
 | |
|     move( $filename, "$filename.$suffix" );
 | |
| 
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: coalesce
 | |
| #      PURPOSE: To find the first defined argument and return it
 | |
| #   PARAMETERS: Arbitrary number of arguments
 | |
| #      RETURNS: The first defined argument or undef if there are none
 | |
| #  DESCRIPTION: Just a simple way of ensuring an 'undef' value is never
 | |
| #               returned when doing so might be a problem.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub coalesce {
 | |
|     foreach (@_) {
 | |
|         return $_ if defined($_);
 | |
|     }
 | |
|     return undef;    ## no critic
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _debug
 | |
| #      PURPOSE: Prints debug reports
 | |
| #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print
 | |
| #               $messages...    Arbitrary list of messages to print
 | |
| #      RETURNS: Nothing
 | |
| #  DESCRIPTION: Outputs messages if $active is true. It removes any trailing
 | |
| #               newline from each one and then adds one in the 'print' to the
 | |
| #               caller doesn't have to bother. Prepends each message with 'D>'
 | |
| #               to show it's a debug message.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: Differs from other functions of the same name
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub _debug {
 | |
|     my $active = shift;
 | |
| 
 | |
|     my $message;
 | |
|     return unless $active;
 | |
| 
 | |
|     while ($message = shift) {
 | |
|         chomp($message);
 | |
|         print STDERR "D> $message\n";
 | |
|     }
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: Options
 | |
| #      PURPOSE: Processes command-line options
 | |
| #   PARAMETERS: $optref     Hash reference to hold the options
 | |
| #      RETURNS: Undef
 | |
| #  DESCRIPTION: Process the options we want to offer. See the documentation
 | |
| #               for details
 | |
| #       THROWS: no exceptions
 | |
| #     COMMENTS: none
 | |
| #     SEE ALSO: n/a
 | |
| #===============================================================================
 | |
| sub Options {
 | |
|     my ($optref) = @_;
 | |
| 
 | |
|     my @options = (
 | |
|         "help",     "documentation|man", "debug=i", "silent!",
 | |
|         "prefix=s", "backup!",           "force!",
 | |
|         # "config=s",
 | |
|     );
 | |
| 
 | |
|     if ( !GetOptions( $optref, @options ) ) {
 | |
|         pod2usage(
 | |
|             -msg     => "$PROG version $VERSION\n",
 | |
|             -exitval => 1,
 | |
|             -verbose => 0
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| }
 | |
| 
 | |
| __END__
 | |
| 
 | |
| #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| #  Application Documentation
 | |
| #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 | |
| #{{{
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| extract_images - extract embedded images from HTML and save as files
 | |
| 
 | |
| =head1 VERSION
 | |
| 
 | |
| This documentation refers to extract_images version 0.0.3
 | |
| 
 | |
| =head1 USAGE
 | |
| 
 | |
|     extract_images
 | |
|         [--help] [--documentation|man] [-debug=N]
 | |
|         [--prefix=STRING] [--[no]backup] [--[no]force] [--[no]silent]
 | |
|         HTML_file [ [HTML_file_2] [ [HTML_file_3] ... ] ]
 | |
| 
 | |
|     Examples:
 | |
| 
 | |
|     extract_images --prefix=picture_ --backup index.html
 | |
| 
 | |
|     extract_images --silent --nobackup shownotes.html
 | |
| 
 | |
| =head1 REQUIRED ARGUMENTS
 | |
| 
 | |
| The script requires the names of one or more HTML files which will be scanned
 | |
| for embedded images.
 | |
| 
 | |
| =head1 OPTIONS
 | |
| 
 | |
| Note that all on/off options can be written as B<--silent>, B<--nosilent> or
 | |
| B<--no-silent>. A single hyphen can be used at the start or a pair.
 | |
| 
 | |
| =over 8
 | |
| 
 | |
| =item B<--help>
 | |
| 
 | |
| Reports brief information about how to use the script and exits. To see the
 | |
| full documentation use the option B<--documentation> or B<--man>. Alternatively,
 | |
| to generate a PDF version use the I<pod2pdf> tool from
 | |
| I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
 | |
| installed with the cpan tool as App::pod2pdf.
 | |
| 
 | |
| =item B<--documentation> or B<--man>
 | |
| 
 | |
| Reports full information about how to use the script and exits. Alternatively,
 | |
| to generate a PDF version use the I<pod2pdf> tool from
 | |
| I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
 | |
| installed with the cpan tool as App::pod2pdf.
 | |
| 
 | |
| =item B<--debug=N>
 | |
| 
 | |
| Run in debug mode at the level specified by I<N>. Possible values are:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<0>
 | |
| 
 | |
| No debugging (the default).
 | |
| 
 | |
| =item B<1>
 | |
| 
 | |
| TBA
 | |
| 
 | |
| =item B<2>
 | |
| 
 | |
| Displays the internal variables used to store the options.
 | |
| 
 | |
| =item B<3>
 | |
| 
 | |
| TBA
 | |
| 
 | |
| =back
 | |
| 
 | |
| =item B<--prefix=STRING>
 | |
| 
 | |
| Since embedded images have no names, when they are written to image files
 | |
| names are generated for them. These names are built from the following
 | |
| components:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<HTML filename>
 | |
| 
 | |
| The name of the HTML file without a suffix. So, if the name is 'index.html',
 | |
| the 'index' part will be used.
 | |
| 
 | |
| =item B<prefix>
 | |
| 
 | |
| The prefix string provided by this option, or 'image' if not specified.
 | |
| 
 | |
| =item B<counter>
 | |
| 
 | |
| The images found in the HTML are counted, starting from 1, and this number is
 | |
| used here.
 | |
| 
 | |
| =item B<extension>
 | |
| 
 | |
| The image format extension, taken from the embedded image information.
 | |
| Examples would be 'jpg', 'jpeg' and 'png'.
 | |
| 
 | |
| =back
 | |
| 
 | |
| The B<HTML filename>, B<prefix>, B<counter> and B<extension> are joined
 | |
| together to make the file name as follows:
 | |
| 
 | |
|     <HTML filename>_<prefix>_<counter>.<extension>
 | |
| 
 | |
| So the following run of the script would generate the first file shown below
 | |
| assuming the first embedded picture was in 'PNG' format:
 | |
| 
 | |
|     extract_images --prefix=picture hpr4283.html
 | |
| 
 | |
|     hpr4283_picture_1.png
 | |
| 
 | |
| =item B<--[no]backup>
 | |
| 
 | |
| The HTML is modified to leave placeholders referring to any embedded images
 | |
| found as it it processed. The new HTML is written to the original file if
 | |
| B<--nobackup> is specified, or a backup of the original file is made, before
 | |
| writing the new HTML. Making a backup is the default behaviour.
 | |
| 
 | |
| =item B<--[no]force>
 | |
| 
 | |
| Images are written to the current directory. If an image already exists, and
 | |
| B<--force> has not been specified, the script will stop processing and exit.
 | |
| The default setting of this option is B<--noforce>.
 | |
| 
 | |
| =item B<--[no]silent>
 | |
| 
 | |
| By default the script reports its progress as it processes an HTML file, but
 | |
| this can be turned off by using this option.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| This Perl script B<extract_images> parses HTML looking for embedded images.
 | |
| Such images use a URI with the scheme 'data' followed by image details
 | |
| including the encoded binary contents. See the Wikipedia article
 | |
| https://en.wikipedia.org/wiki/Data_URI_scheme for details.
 | |
| 
 | |
| When such images are found they are written as files (with generated names),
 | |
| and the HTML is updated with these names replacing the original URI. Further
 | |
| work is likely to be required to build full image links, but the majority of
 | |
| work will have been done.
 | |
| 
 | |
| The script will not overwrite image files unless the B<--force> option is
 | |
| used, but will overwrite the original HTML file unless B<--backup> is
 | |
| specified.
 | |
| 
 | |
| By default details of progress are written to standard output, but this can be
 | |
| preveneted by using the B<--silent> option.
 | |
| 
 | |
| =head2 GENERATED IMAGE FILE NAMES
 | |
| 
 | |
| These names are built from various elements:
 | |
| 
 | |
|     <HTML filename>_<prefix>_<counter>.<extension>
 | |
| 
 | |
| Where <HTML filename> is the base name of the HTML input file, <prefix> is the
 | |
| string provided with the B<--prefix=STRING> option (or B<image> by default),
 | |
| <counter> is a count of image files found during the scan of the HTML, and
 | |
| <extension> is the appropriate file extension for the type of image being
 | |
| converted.
 | |
| 
 | |
| See the option B<--prefix=STRING> for details of file name generation.
 | |
| 
 | |
| =head1 DIAGNOSTICS
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<Unable to find ...>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| The script is attempting to find a file presented to it as an argument, but
 | |
| cannot. It will skip to the next argument and continue.
 | |
| 
 | |
| =item B<File ... is not HTML>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| The script is checking files presented to it as arguments. The named file has
 | |
| been checked to see if it contains HTML, and it appears it does not.  It will
 | |
| skip to the next argument and continue.
 | |
| 
 | |
| =item B<Unable to open ...>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| The script is attempting to open a file presented to it as an argument. Its
 | |
| existence has already been checked but it cannot be opened, possibly due to
 | |
| a permissions issue.
 | |
| 
 | |
| =item B<HTML::TreeBuilder failed to parse notes: ...>
 | |
| 
 | |
| Type: fatal
 | |
| 
 | |
| The script has attempted to parse the HTML in a file presented to it. This has
 | |
| failed. The error message includes a report from the module used to do this.
 | |
| 
 | |
| =item B<File ... exists; not overwriting>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| The script is attempting to write an image file copied from the HTML, but has
 | |
| found it already exists. Using the option B<--force> would cause it to be
 | |
| overwritten, but this option is not enabled.
 | |
| 
 | |
| The script will not write the file, however, it will still modify the HTML to
 | |
| reference the existing image file.
 | |
| 
 | |
| =item B<Unable to find ... to backup>
 | |
| 
 | |
| Type: warning
 | |
| 
 | |
| The script is attempting to make a backup of one of the HTML file arguments,
 | |
| but is unable to find it. Since this is a rather unusual circumstance (has the
 | |
| file been deleted?), it is assumed it can be regenerated if needed, so the
 | |
| writing of the modified HTML is continued.
 | |
| 
 | |
| =head1 CONFIGURATION AND ENVIRONMENT
 | |
| 
 | |
| TBA
 | |
| 
 | |
| =head1 DEPENDENCIES
 | |
| 
 | |
|     Data::Dumper
 | |
|     File::Copy
 | |
|     Getopt::Long
 | |
|     HTML::TreeBuilder
 | |
|     MIME::Types
 | |
|     Path::Tiny
 | |
|     Pod::Usage
 | |
|     URI
 | |
| 
 | |
| =head1 BUGS AND LIMITATIONS
 | |
| 
 | |
| There are no known bugs in this module.
 | |
| Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
 | |
| Patches are welcome.
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
| Dave Morriss  (Dave.Morriss@gmail.com)
 | |
| 
 | |
| =head1 LICENCE AND COPYRIGHT
 | |
| 
 | |
| Copyright (c) 2024 Dave Morriss (Dave.Morriss@gmail.com).
 | |
| All rights reserved.
 | |
| 
 | |
| This module is free software; you can redistribute it and/or
 | |
| modify it under the same terms as Perl itself. See perldoc perlartistic.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful,
 | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | |
| 
 | |
| =cut
 | |
| 
 | |
| #}}}
 | |
| 
 | |
| # [zo to open fold, zc to close]
 | |
| 
 | |
| # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 | |
| 
 |