Correction to the component order of the generated file name

2024-12-31 17:32:27 +00:00 · 2024-12-31 17:32:27 +00:00 · 2f350dd1db
commit 2f350dd1db
parent e0c4545295
2 changed files with 6 additions and 659 deletions
--- a/654
+++ b/654
@ -1,654 +0,0 @@
-#!/usr/bin/env perl
-#===============================================================================
-#
-#         FILE: extract_images
-#
-#        USAGE: ./extract_images [--help] [--documentation|man]
-#               [--prefix=STRING] [--[no-]backup] [--force] [--[no]silent]
-#               HTML_file [ [HTML_file_2] [HTML_file_3] ... ]
-#
-#  DESCRIPTION: Processes HTML files which may have 'data' URIs containing
-#               images, and extracts these images into files in the same
-#               directory.  The 'data' scheme links are converted to 'https'
-#               and reference the extracted files. The modified HTML is
-#               output, and the original will be saved as a backup if
-#               requested.
-#
-#      OPTIONS: ---
-# REQUIREMENTS: ---
-#         BUGS: ---
-#        NOTES: ---
-#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
-#      VERSION: 0.0.1
-#      CREATED: 2024-12-25 10:53:15
-#     REVISION: 2024-12-29 15:19:35
-#
-#===============================================================================
-
-use v5.36;
-use strict;
-use warnings;
-use feature qw{ postderef say signatures state try };
-no warnings
-    qw{ experimental::postderef experimental::signatures experimental::try };
-
-use open ':std', ':encoding(UTF-8)'; # Make all IO UTF-8
-
-use Getopt::Long;
-use Pod::Usage;
-
-use HTML::TreeBuilder 5 -weak;
-use URI;
-use MIME::Types;
-
-#use File::Slurper;
-use Path::Tiny;
-use File::Copy;
-
-use Data::Dumper;
-
-#
-# Version number (Incremented by Vim)
-#
-our $VERSION = '0.0.1';
-
-#
-# Script and directory names
-#
-( my $PROG = $0 ) =~ s|.*/||mx;
-
-#-------------------------------------------------------------------------------
-# Declarations
-#-------------------------------------------------------------------------------
-my ( $notes,   $typename, $tree,         $uri );
-my ( $fcount,  $basename, $filename,     $suffix, $fh );
-my ( $updates, $bsuffix,  $new_basename, $newURL );
-
-my $backupcount = 5;
-
-#-------------------------------------------------------------------------------
-# Options and arguments
-#-------------------------------------------------------------------------------
-# {{{
-#
-# Option defaults
-#
-my $DEFDEBUG = 0;
-my $DEFPREFIX = 'image';
-
-my %options;
-Options( \%options );
-
-#
-# Default help shows minimal information
-#
-pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
-    if ( $options{'help'} );
-
-#
-# The -documentation or -man option shows the full POD documentation through
-# a pager for convenience
-#
-pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
-    if ( $options{'documentation'} );
-
-#
-# Collect options
-#
-my $DEBUG  = ( defined( $options{debug} )  ? $options{debug}  : $DEFDEBUG );
-my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 );
-my $prefix = ( defined( $options{prefix} ) ? $options{prefix} : $DEFPREFIX );
-my $backup = ( defined( $options{backup} ) ? $options{backup} : 1 );
-my $force  = ( defined( $options{force} )  ? $options{force}  : 0 );
-
-#
-# Check we have arguments
-#
-die "Usage: $PROG [options] file1 [file2 [file3 [fileN]]]\n"
-    unless ( scalar(@ARGV) > 0 );
-
-#
-# Clean up the prefix
-#
-$prefix = ($prefix =~ /(.*)_$/ ? $1 : $prefix);
-
-#
-# Backup suffix (the dot gets added later)
-#
-$bsuffix = 'bck';
-
-#
-# Debug the options
-#
-_debug(
-    $DEBUG > 1,
-    '$silent = ' . $silent,
-    '$prefix = ' . $prefix,
-    '$backup = ' . $backup,
-    '$force = ' . $force,
-);
-
-# }}}
-
-#-------------------------------------------------------------------------------
-# Prepare items for later use: current working directory and a MIME::Types
-# object
-#-------------------------------------------------------------------------------
-my $curdir = Path::Tiny->cwd;
-
-my $mt = MIME::Types->new;
-
-#-------------------------------------------------------------------------------
-# Loop through the arguments
-#-------------------------------------------------------------------------------
-foreach my $notesfile (@ARGV) {
-    unless (-e $notesfile) {
-        warn "Unable to find $notesfile\n";
-        next;
-    }
-
-    #
-    # Force the MIME type of $notesfile to a string
-    #
-    $typename = "" . $mt->mimeTypeOf("$notesfile");
-
-    #
-    # Check the MIME type and reject non-HTML
-    #
-    unless ($typename eq 'text/html') {
-        warn "File $notesfile is not HTML\n";
-        next
-    }
-
-    say "Reading from $notesfile\n" unless $silent;
-
-    #
-    # Get HTML file basename without the suffix for building filenames
-    #
-    $basename = path($notesfile)->basename('.html');
-
-    #
-    # Read the HTML
-    #
-    open (my $nfh, '<', $notesfile) or die "Unable to open $notesfile\n";
-    $notes = <$nfh>;
-    close($nfh);
-
-    #
-    # Image files are to have an index
-    #
-    $fcount = 0;
-
-    #
-    # Keep a note of HTML updates
-    #
-    $updates = 0;
-
-    #
-    # Initialise the TreeBuilder
-    #
-    $tree = HTML::TreeBuilder->new;
-    $tree->ignore_unknown(0);
-    $tree->no_expand_entities(1);
-    $tree->p_strict(1);
-    $tree->store_comments(1);
-    $tree->warn(1);
-
-    #
-    # Load this HTML file into the TreeBuilder
-    #
-    $tree->parse_content($notes)
-        or die "HTML::TreeBuilder failed to parse notes: $!\n";
-
-    #
-    # Loop through the tree looking for 'data' scheme images
-    #
-    for ( @{ $tree->extract_links( 'img' ) } ) {
-        my ( $link, $element, $attr, $tag ) = @$_;
-
-        $uri = URI->new( $link );
-        unless ($silent) {
-            say "Scheme: ",$uri->scheme;
-            say "Media type: ",$uri->media_type;
-        }
-
-        #
-        # We only care about 'data' scheme stuff - for now anyway, and only
-        # images within this set
-        #
-        if ( $uri->scheme eq 'data' ) {
-            #
-            # Only images
-            #
-            if ( $uri->media_type =~ /^image/ ) {
-                #
-                # Extract the file name suffix from the MIME string, and give it
-                # a leading '.'
-                #
-                ( $suffix = $uri->media_type ) =~ s{^.*/}{.};
-
-                #
-                # Construct the filename for this image
-                #
-                $fcount++;
-                $filename
-                    = "$curdir/${prefix}_${basename}_${fcount}${suffix}";
-                say "Writing to: $filename" unless $silent;
-
-                say '-' x 40 unless $silent;
-
-                #
-                # Check if the file exists. Don't clobber it unless --force is active
-                #
-                if ( -e $filename ) {
-                    unless ($force) {
-                        warn "File $filename exists; not overwriting\n";
-                    }
-                }
-                else {
-                    #
-                    # Write the data to the file in binary format. The URI module
-                    # does the conversion so it's already binary.
-                    #
-                    $fh = path($filename)->openw_raw;
-                    print $fh $uri->data;
-                    close($fh);
-                }
-
-                #
-                # Update the HTML with a link to the file we created
-                #
-                $updates++;
-                $newURL       = path($filename)->basename;
-                $element->attr( $attr, $newURL );
-            }
-        }
-    } # extract_links loop
-
-    #
-    # Output the changed HTML turning what became standalone back into
-    # a <body> fragment.
-    #
-    if ($updates > 0) {
-        my $body = $tree->look_down( _tag => 'body' );
-        ( my $result = $body->as_HTML( undef, ' ', {} ) )
-            =~ s{(^<body[^>]*>|</body>$)}{}gi;
-
-        #$notesfile = path($notesfile)->basename;
-        if ($backup) {
-            _backup( $notesfile, $bsuffix, $backupcount );
-            say "$notesfile backed up" unless $silent;
-        }
-        else {
-            say "$notesfile not backed up" unless $silent;
-        }
-
-        $fh = path($notesfile)->openw;
-        say $fh $result;
-        close($fh);
-
-        say "$updates images converted, $notesfile updated" unless $silent;
-    }
-    else {
-        say "No images found, no changes made to $notesfile" unless $silent;
-    }
-
-} # $notesfile loop
-
-exit;
-
-#===  FUNCTION  ================================================================
-#         NAME: _backup
-#      PURPOSE: Given a file, make a backup of it by appending $suffix, and
-#               handle previous backups
-#   PARAMETERS: $filename               path of file to backup
-#               $suffix                 suffix to use, default 'bck'
-#               $limit                  number of backups to keep
-#      RETURNS: True or false depending on success
-#  DESCRIPTION: Checks that the file exists and returns FALSE if it doesn't.
-#               Rationalises the $limit to avoid it being 0 or less.
-#       THROWS: No exceptions
-#     COMMENTS: None
-#     SEE ALSO: N/A
-#===============================================================================
-sub _backup {
-    my ( $filename, $suffix, $limit ) = @_;
-
-    my ( $backupname, $limitname );
-
-    #
-    # Check the target file exists
-    #
-    unless ( -e $filename ) {
-        warn "Unable to find $filename to backup\n";
-        return 0;
-    }
-
-    #
-    # Handle invalid $limit values
-    #
-    $limit = 1 unless ( $limit >= 1 );
-
-    #
-    # Defaults
-    #
-    $suffix     = 'bck' unless $suffix;
-    $limitname  = "$filename.$suffix.$limit";
-    $backupname = "$filename.$suffix";
-
-    #
-    # Look for existing backups
-    #
-    if ( -e $backupname ) {
-
-        #
-        # If maximum version exists delete it
-        #
-        if ( -e $limitname ) {
-            unlink($limitname);
-        }
-
-        #
-        # Go backwards through the versions incrementing their version numbers
-        #
-        for ( my $vsn = $limit - 1; $vsn > 0; $vsn-- ) {
-            if ( -e "$filename.$suffix.$vsn" ) {
-                move( "$filename.$suffix.$vsn",
-                    sprintf( '%s.%s.%s', $filename, $suffix, $vsn + 1 ) );
-            }
-        }
-
-        #
-        # Make $filename.bck into $filename.bck.1
-        #
-        move( "$filename.$suffix", "$filename.$suffix.1" );
-
-    }
-
-    #
-    # Finally save the $filename as $filename.bck
-    #
-    move( $filename, "$filename.$suffix" );
-
-    return 1;
-}
-
-#===  FUNCTION  ================================================================
-#         NAME: _debug
-#      PURPOSE: Prints debug reports
-#   PARAMETERS: $active         Boolean: 1 for print, 0 for no print
-#               $messages...    Arbitrary list of messages to print
-#      RETURNS: Nothing
-#  DESCRIPTION: Outputs messages if $active is true. It removes any trailing
-#               newline from each one and then adds one in the 'print' to the
-#               caller doesn't have to bother. Prepends each message with 'D>'
-#               to show it's a debug message.
-#       THROWS: No exceptions
-#     COMMENTS: Differs from other functions of the same name
-#     SEE ALSO: N/A
-#===============================================================================
-sub _debug {
-    my $active = shift;
-
-    my $message;
-    return unless $active;
-
-    while ($message = shift) {
-        chomp($message);
-        print STDERR "D> $message\n";
-    }
-}
-
-#===  FUNCTION  ================================================================
-#         NAME: Options
-#      PURPOSE: Processes command-line options
-#   PARAMETERS: $optref     Hash reference to hold the options
-#      RETURNS: Undef
-#  DESCRIPTION: Process the options we want to offer. See the documentation
-#               for details
-#       THROWS: no exceptions
-#     COMMENTS: none
-#     SEE ALSO: n/a
-#===============================================================================
-sub Options {
-    my ($optref) = @_;
-
-    my @options = (
-        "help",     "documentation|man", "debug=i", "silent!",
-        "prefix=s", "backup!",           "force!",
-        # "config=s",
-    );
-
-    if ( !GetOptions( $optref, @options ) ) {
-        pod2usage(
-            -msg     => "$PROG version $VERSION\n",
-            -exitval => 1,
-            -verbose => 0
-        );
-    }
-
-    return;
-}
-
-__END__
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-#  Application Documentation
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-#{{{
-
-=head1 NAME
-
-extract_images - extract embedded images from HTML and save as files
-
-=head1 VERSION
-
-This documentation refers to extract_images version 0.0.1
-
-=head1 USAGE
-
-    extract_images
-        [--help] [--documentation|man] [-debug=N]
-        [--prefix=STRING] [--[no]backup] [--[no]force] [--[no]silent]
-        HTML_file [ [HTML_file_2] [ [HTML_file_3] ... ] ]
-
-    Examples:
-
-    extract_images --prefix=picture_ --backup index.html
-
-    extract_images --silent --nobackup shownotes.html
-
-=head1 REQUIRED ARGUMENTS
-
-The script requires the names of one or more HTML files which will be scanned
-for embedded images.
-
-=head1 OPTIONS
-
-Note that all on/off options can be written as B<--silent>, B<--nosilent> or
-B<--no-silent>. A single hyphen can be used at the start or a pair.
-
-=over 8
-
-=item B<--help>
-
-Reports brief information about how to use the script and exits. To see the
-full documentation use the option B<--documentation> or B<--man>. Alternatively,
-to generate a PDF version use the I<pod2pdf> tool from
-I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
-installed with the cpan tool as App::pod2pdf.
-
-=item B<--documentation> or B<--man>
-
-Reports full information about how to use the script and exits. Alternatively,
-to generate a PDF version use the I<pod2pdf> tool from
-I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
-installed with the cpan tool as App::pod2pdf.
-
-=item B<--debug=N>
-
-Run in debug mode at the level specified by I<N>. Possible values are:
-
-=over 4
-
-=item B<0>
-
-No debugging (the default).
-
-=item B<1>
-
-TBA
-
-=item B<2>
-
-TBA
-
-=item B<3>
-
-TBA
-
-=back
-
-=item B<--prefix=STRING>
-
-Since embedded images have no names, when they are written to image files
-names are generated for them. These names are built from the following
-components:
-
-=over 4
-
-=item B<HTML filename>
-
-The name of the HTML file without a suffix. So, if the name is 'index.html',
-the 'index' part will be used.
-
-=item B<prefix>
-
-The prefix string provided by this option, or 'image' if not specified.
-
-=item B<counter>
-
-The images found in the HTML are counted, starting from 1, and this number is
-used here.
-
-=item B<extension>
-
-The image format extension, taken from the embedded image information.
-Examples would be 'jpg', 'jpeg' and 'png'.
-
-=back
-
-The B<HTML filename>, B<prefix>, B<counter> and B<extension> are joined
-together to make the file name as follows:
-
-    <HTML filename>_<prefix>_<counter>.<extension>
-
-So the following run of the script would generate the first file shown below
-assuming the first embedded picture was in 'PNG' format:
-
-    extract_images --prefix=picture hpr4283.html
-
-    hpr4283_picture_1.png
-
-=item B<--[no]backup>
-
-The HTML is modified to leave placeholders referring to any embedded images
-found as it it processed. The new HTML is written to the original file if
-B<--nobackup> is specified, or a backup of the original file is made, before
-writing the new HTML. Making a backup is the default behaviour.
-
-=item B<--[no]force>
-
-Images are written to the current directory. If an image already exists, and
-B<--force> has not been specified, the script will stop processing and exit.
-The default setting of this option is B<--noforce>.
-
-=item B<--[no]silent>
-
-By default the script reports its progress as it processes an HTML file, but
-this can be turned off by using this option.
-
-=back
-
-=head1 DESCRIPTION
-
-This Perl script B<extract_images> parses HTML looking for embedded images.
-Such images use a URI with the scheme 'data' followed by image details
-including the encoded binary contents. See the Wikipedia article
-https://en.wikipedia.org/wiki/Data_URI_scheme for details.
-
-When such images are found they are written as files (with generated names),
-and the HTML is updated with these names replacing the original URI. Further
-work is likely to be required to build full image links, but the majority of
-work will have been done.
-
-The script will not overwrite image files unless the B<--force> option is
-used, but will overwrite the original HTML file unless B<--backup> is
-specified.
-
-By default details of progress are written to standard output, but this can be
-preveneted by using the B<--silent> option.
-
-=head2 GENERATED IMAGE FILE NAMES
-
-These names are built from various elements:
-
-    <HTML filename>_<prefix>_<counter>.<extension>
-
-Where <HTML filename> is the base name of the HTML input file, <prefix> is the
-string provided with the B<--prefix=STRING> option (or B<image> by default),
-<counter> is a count of image files found during the scan of the HTML, and
-<extension> is the appropriate file extension for the type of image being
-converted.
-
-See the option B<--prefix=STRING> for details of file name generation.
-
-=head1 DIAGNOSTICS
-
-TBA
-
-=head1 CONFIGURATION AND ENVIRONMENT
-
-TBA
-
-=head1 DEPENDENCIES
-
-    Data::Dumper
-    File::Copy
-    Getopt::Long
-    HTML::TreeBuilder
-    MIME::Types
-    Path::Tiny
-    Pod::Usage
-    URI
-
-=head1 BUGS AND LIMITATIONS
-
-There are no known bugs in this module.
-Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
-Patches are welcome.
-
-=head1 AUTHOR
-
-Dave Morriss  (Dave.Morriss@gmail.com)
-
-=head1 LICENCE AND COPYRIGHT
-
-Copyright (c) 2024 Dave Morriss (Dave.Morriss@gmail.com).
-All rights reserved.
-
-This module is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself. See perldoc perlartistic.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
-=cut
-
-#}}}
-
-# [zo to open fold, zc to close]
-
-# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
-
--- a/Show_Submission/extract_images
+++ b/Show_Submission/extract_images
@ -19,9 +19,9 @@
 #         BUGS: ---
 #        NOTES: ---
 #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
-#      VERSION: 0.0.2
+#      VERSION: 0.0.3
 #      CREATED: 2024-12-25 10:53:15
-#     REVISION: 2024-12-30 09:28:40
+#     REVISION: 2024-12-30 11:52:59
 #
 #===============================================================================

@ -48,7 +48,7 @@ use Data::Dumper;
 #
 # Version number (Incremented by Vim)
 #
-our $VERSION = '0.0.2';
+our $VERSION = '0.0.3';

 #
 # Script and directory names
@ -242,8 +242,9 @@ foreach my $notesfile (@ARGV) {
                # the directory the HTML is in.
                #
                $fcount++;
+                # ${fileprefix}_${prefix}_${increment}.${extension}
                $filename
-                    = "$dirname/${prefix}_${basename}_${fcount}${suffix}";
+                    = "$dirname/${basename}_${prefix}_${fcount}${suffix}";
                say "Writing to: $filename" unless $silent;

                say '-' x 40 unless $silent;
@ -473,7 +474,7 @@ extract_images - extract embedded images from HTML and save as files

 =head1 VERSION

-This documentation refers to extract_images version 0.0.2
+This documentation refers to extract_images version 0.0.3

 =head1 USAGE