501 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			501 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/usr/bin/env perl | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: view_derivatives | ||
|  | # | ||
|  | #        USAGE: ./view_derivatives metadata_file | ||
|  | # | ||
|  | #  DESCRIPTION: Experimental! | ||
|  | #               Reads a JSON file containing IA metadata for an item (HPR | ||
|  | #               show) and processes 'derived' files from the IA. Their | ||
|  | #               relationships are usually simple, but sometimes can be rather | ||
|  | #               weird, so building a representation of the hierarchy might be | ||
|  | #               a way to understand what's there and maybe make a list for the | ||
|  | #               'ia delete' command to work on! | ||
|  | #               Each 'original' file may be accompanied by 'derived' files. So | ||
|  | #               an image file might generate a thumbnail for example. The | ||
|  | #               image will be marked in the JSON metadata as "original" and | ||
|  | #               the derived file as a "derivative" with the file it was built | ||
|  | #               from being its "parent". There can also be "derivative" files | ||
|  | #               built from other "derivative"s in rare cases. This has been | ||
|  | #               seen when an EPUB file is used to build a PDF "derivative", | ||
|  | #               and then other versions of the PDF are generated as | ||
|  | #               "derivatives" with the PDF as the "parent". | ||
|  | #               The point is that we do not want the "derivatives". We (HPR) | ||
|  | #               generate all of the versions of a given file we require: | ||
|  | #               audio types and thumbnails, for example. The tools we use can | ||
|  | #               disable the generation of "derivatives", so it has been rare | ||
|  | #               to see them in recent times. However, with older shows we | ||
|  | #               either couldn't stop the "derive" process, or some IA servers | ||
|  | #               ignored our '--no-derive' options. Now (August 2024) we are | ||
|  | #               cleaning up the HPR collection, and thus this script has been | ||
|  | #               developed. | ||
|  | #               Special note: any cleaning up of IA items needs to be done | ||
|  | #               before files are moved around. The parental relationships are | ||
|  | #               stored in the metadata, and do not track file movements. This | ||
|  | #               script cannot analyse metadata from after such rearrangements! | ||
|  | #               [It may be possible to adjust these metadata fields, but it | ||
|  | #               does not seem to be necessary if the correct sequence of | ||
|  | #               changes is adhered to - find derivatives, delete derivatives, | ||
|  | #               move files] | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.2 | ||
|  | #      CREATED: 2024-08-12 16:26:29 | ||
|  | #     REVISION: 2024-08-17 13:44:44 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | use v5.36; | ||
|  | use strict; | ||
|  | use warnings; | ||
|  | use feature qw{ postderef say signatures state try }; | ||
|  | no warnings | ||
|  |     qw{ experimental::postderef experimental::signatures experimental::try }; | ||
|  | 
 | ||
|  | use open ':std', ':encoding(UTF-8)'; # Make all IO UTF-8 | ||
|  | 
 | ||
|  | use Carp; | ||
|  | use Getopt::Long; | ||
|  | use Pod::Usage; | ||
|  | 
 | ||
|  | use File::Slurper qw{ read_text read_lines }; | ||
|  | use JSON; | ||
|  | 
 | ||
|  | use Data::Dumper; | ||
|  | 
 | ||
|  | # | ||
|  | # Version number (Incremented by Vim) | ||
|  | # | ||
|  | our $VERSION = '0.0.2'; | ||
|  | 
 | ||
|  | # | ||
|  | # Script and directory names | ||
|  | # | ||
|  | ( my $PROG = $0 ) =~ s|.*/||mx; | ||
|  | ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||
|  | $DIR = '.' unless $DIR; | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Declarations | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Constants and other declarations | ||
|  | # | ||
|  | my $basedir    = "$ENV{HOME}/HPR/InternetArchive"; | ||
|  | my $outputfile = "$basedir/$PROG.svg"; | ||
|  | my $logfile    = "$basedir/$PROG.log"; | ||
|  | 
 | ||
|  | my ( $file, $filebuffer, $json, @jsonbuffer ); | ||
|  | my ( %filetree, @original, @derived ); | ||
|  | 
 | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # Options | ||
|  | #------------------------------------------------------------------------------- | ||
|  | # | ||
|  | # Option defaults | ||
|  | # | ||
|  | my $DEFDEBUG = 0; | ||
|  | 
 | ||
|  | # | ||
|  | # Options and arguments | ||
|  | # | ||
|  | my %options; | ||
|  | Options( \%options ); | ||
|  | 
 | ||
|  | # | ||
|  | # Default help shows minimal information | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 ) | ||
|  |     if ( $options{'help'} ); | ||
|  | 
 | ||
|  | # | ||
|  | # The -documentation or -man option shows the full POD documentation through | ||
|  | # a pager for convenience | ||
|  | # | ||
|  | pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 ) | ||
|  |     if ( $options{'documentation'} ); | ||
|  | 
 | ||
|  | my $DEBUG   = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG ); | ||
|  | my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 ); | ||
|  | my $verbose = ( defined( $options{verbose} )   ? $options{verbose}   : 0 ); | ||
|  | 
 | ||
|  | my $list_derived | ||
|  |     = ( defined( $options{list_derived} ) ? $options{list_derived} : 0 ); | ||
|  | 
 | ||
|  | # | ||
|  | # Get the filename argument (metadata in JSON format) | ||
|  | # | ||
|  | $file = shift; | ||
|  | die "File of IA metadata is required\n" unless ($file); | ||
|  | 
 | ||
|  | # | ||
|  | # Read the entire JSON file into a buffer | ||
|  | # | ||
|  | try { | ||
|  |     $filebuffer = read_text($file); | ||
|  | } | ||
|  | 
 | ||
|  | catch ($e) { | ||
|  |     die "Failed to read JSON file $file\n"; | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Parse the JSON from the buffer | ||
|  | # | ||
|  | try { | ||
|  |     $json = JSON->new; | ||
|  |     @jsonbuffer = $json->incr_parse($filebuffer); | ||
|  | } | ||
|  | 
 | ||
|  | catch ($e) { | ||
|  |     die "Failed to parse JSON\n"; | ||
|  | } | ||
|  | 
 | ||
|  | _debug($DEBUG > 3, '@jsonbuffer: ' . Dumper(\@jsonbuffer)); | ||
|  | 
 | ||
|  | # | ||
|  | # Check that this is the type of JSON we need | ||
|  | # | ||
|  | die "Empty JSON?\n" unless (@jsonbuffer); | ||
|  | 
 | ||
|  | # | ||
|  | # We got an array of one element from 'ia metadata <item>'. Use $md to | ||
|  | # reference that one element | ||
|  | # | ||
|  | my $md = $jsonbuffer[0]; | ||
|  | 
 | ||
|  | # | ||
|  | # Collect the identifier from the parsed JSON and define the one derived file | ||
|  | # we don't want to delete. | ||
|  | # | ||
|  | my $identifier = $md->{metadata}->{identifier}; | ||
|  | my $item_png = "${identifier}.png"; | ||
|  | 
 | ||
|  | # | ||
|  | # Build a hash from the original and derived files referenced in the metadata. | ||
|  | # The hash will contain all but a number of files which are not of interest | ||
|  | # when sorting out which file came from which. At the end of the loop the | ||
|  | # structure will contain back links with child files pointing to parents. The | ||
|  | # forward child links get added later. | ||
|  | # | ||
|  | # The original names of files are also kept in an array to help with | ||
|  | # traversing the main hash. | ||
|  | # | ||
|  | foreach my $file ( @{ $md->{files} } ) { | ||
|  |     my $fname = $file->{name}; | ||
|  | 
 | ||
|  |     printf "%-40s %-10s %s\n", $fname, $file->{source}, | ||
|  |         coalesce( $file->{original}, 'null' ) | ||
|  |         if ($verbose > 1); | ||
|  | 
 | ||
|  |     unless ( $file->{source} =~ /[Mm]etadata/ | ||
|  |         || $file->{format} | ||
|  |         =~ /[Mm]etadata|Item Tile|Columbia Peaks|Spectrogram/ ) | ||
|  |     { | ||
|  |         $filetree{$fname} = { | ||
|  |             source   => $file->{source}, | ||
|  |             parent   => $file->{original}, | ||
|  |             format   => $file->{format}, | ||
|  |             children => [], | ||
|  |         }; | ||
|  | 
 | ||
|  |         if ( $file->{source} eq 'original' ) { | ||
|  |             push( @original, $fname ); | ||
|  |         } | ||
|  |         elsif ( $file->{source} eq 'derivative' ) { | ||
|  |             push( @derived, $fname ); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # Visit all the nodes for derived files. They point to their parent node, so | ||
|  | # we can visit this node and add the children to an internal array. | ||
|  | # | ||
|  | foreach my $file (@derived) { | ||
|  |     if ( defined( $filetree{$file}->{parent} ) ) { | ||
|  |         push( | ||
|  |             @{ $filetree{ $filetree{$file}->{parent} }->{children} }, | ||
|  |             $file | ||
|  |         ); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | # | ||
|  | # The trees are built, so we can dump the final result | ||
|  | # | ||
|  | _debug( | ||
|  |    $DEBUG > 2, | ||
|  |    '%filetree' | ||
|  |        . sprintf( '[%d]: ', scalar( keys(%filetree) ) ) | ||
|  |        . Dumper( \%filetree ) | ||
|  | ); | ||
|  | 
 | ||
|  | _debug($DEBUG > 2, '@original: ' . Dumper(\@original)); | ||
|  | _debug($DEBUG > 2, '@derived: ' . Dumper(\@derived)); | ||
|  | 
 | ||
|  | # | ||
|  | # If the verbosity level is greater than 0 scan all the 'original' files in | ||
|  | # the '$filetree' hash. For each one walk its 'children' array if there's | ||
|  | # anything in it, and recurse into each node it points to. It's possible to | ||
|  | # have further levels than the one below the parent, but very little seems to | ||
|  | # be set up this way so far. | ||
|  | # | ||
|  | if ($verbose > 0) { | ||
|  |     foreach my $file ( sort(@original) ) { | ||
|  |         # | ||
|  |         # Print the top level node and recurse into any children and print them | ||
|  |         # | ||
|  |         display_nodes( \%filetree, 0, $file ); | ||
|  |     } | ||
|  |     say '-' x 10; | ||
|  | } | ||
|  | 
 | ||
|  | if ($list_derived) { | ||
|  |     foreach my $file ( sort(@derived) ) { | ||
|  |         say "$file" unless ($file eq $item_png); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | exit; | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: display_nodes | ||
|  | #      PURPOSE: Walks a tree of original and derived files in the IA metadata | ||
|  | #   PARAMETERS: $tree           Hashref pointing to the file tree | ||
|  | #               $level          The current level within the tree | ||
|  | #               $key            Hash key for the current node | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Recurses through a tree | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub display_nodes { | ||
|  |     my ($tree, $level, $key) = @_; | ||
|  | 
 | ||
|  |     # | ||
|  |     # Report this node | ||
|  |     # | ||
|  |     printf "%s%d: %s\n", "\t" x $level, $level, $key; | ||
|  | 
 | ||
|  |     # | ||
|  |     # If there are children we use each to recurse to a lower level | ||
|  |     # | ||
|  |     if (scalar(@{$tree->{$key}->{children}}) > 0) { | ||
|  |         $level++; | ||
|  | 
 | ||
|  |         foreach my $child (@{$tree->{$key}->{children}}) { | ||
|  |             display_nodes($tree, $level, $child); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: coalesce | ||
|  | #      PURPOSE: To find the first defined argument and return it | ||
|  | #   PARAMETERS: Arbitrary number of arguments | ||
|  | #      RETURNS: The first defined argument or undef if there are none | ||
|  | #  DESCRIPTION: Just a simple way of ensuring an 'undef' value is never | ||
|  | #               returned when doing so might be a problem. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub coalesce { | ||
|  |     foreach (@_) { | ||
|  |         return $_ if defined($_); | ||
|  |     } | ||
|  |     return undef;    ## no critic | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: _debug | ||
|  | #      PURPOSE: Prints debug reports | ||
|  | #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | ||
|  | #               $message        Message to print | ||
|  | #      RETURNS: Nothing | ||
|  | #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | ||
|  | #               newline and then adds one in the 'print' to the caller doesn't | ||
|  | #               have to bother. Prepends the message with 'D> ' to show it's | ||
|  | #               a debug message. | ||
|  | #       THROWS: No exceptions | ||
|  | #     COMMENTS: None | ||
|  | #     SEE ALSO: N/A | ||
|  | #=============================================================================== | ||
|  | sub _debug { | ||
|  |     my ( $active, $message ) = @_; | ||
|  | 
 | ||
|  |     chomp($message); | ||
|  |     print "D> $message\n" if $active; | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: Options | ||
|  | #      PURPOSE: Processes command-line options | ||
|  | #   PARAMETERS: $optref     Hash reference to hold the options | ||
|  | #      RETURNS: Undef | ||
|  | #  DESCRIPTION: Process the options we want to offer. See the documentation | ||
|  | #               for details | ||
|  | #       THROWS: no exceptions | ||
|  | #     COMMENTS: none | ||
|  | #     SEE ALSO: n/a | ||
|  | #=============================================================================== | ||
|  | sub Options { | ||
|  |     my ($optref) = @_; | ||
|  | 
 | ||
|  |     my @options = ( | ||
|  |         "help",     "documentation|man", "debug=i", "dry-run!", | ||
|  |         "verbose+", "list_derived!" | ||
|  |     ); | ||
|  | 
 | ||
|  |     if ( !GetOptions( $optref, @options ) ) { | ||
|  |         pod2usage( | ||
|  |             -msg     => "$PROG version $VERSION\n", | ||
|  |             -exitval => 1, | ||
|  |             -verbose => 0 | ||
|  |         ); | ||
|  |     } | ||
|  | 
 | ||
|  |     return; | ||
|  | } | ||
|  | 
 | ||
|  | __END__ | ||
|  | 
 | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #  Application Documentation | ||
|  | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|  | #{{{ | ||
|  | 
 | ||
|  | =head1 NAME | ||
|  | 
 | ||
|  | <application name> - <One line description of application's purpose> | ||
|  | 
 | ||
|  | =head1 VERSION | ||
|  | 
 | ||
|  | The initial template usually just has: | ||
|  | 
 | ||
|  | This documentation refers to <application name> version 0.0.2 | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 USAGE | ||
|  | 
 | ||
|  |     # Brief working invocation example(s) here showing the most common usage(s) | ||
|  | 
 | ||
|  |     # This section will be as far as many users ever read | ||
|  |     # so make it as educational and exemplary as possible. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 REQUIRED ARGUMENTS | ||
|  | 
 | ||
|  | A complete list of every argument that must appear on the command line. | ||
|  | when the application  is invoked, explaining what each of them does, any | ||
|  | restrictions on where each one may appear (i.e. flags that must appear | ||
|  | before or after filenames), and how the various arguments and options | ||
|  | may interact (e.g. mutual exclusions, required combinations, etc.) | ||
|  | 
 | ||
|  | If all of the application's arguments are optional this section | ||
|  | may be omitted entirely. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 OPTIONS | ||
|  | 
 | ||
|  | A complete list of every available option with which the application | ||
|  | can be invoked, explaining what each does, and listing any restrictions, | ||
|  | or interactions. | ||
|  | 
 | ||
|  | If the application has no options this section may be omitted entirely. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DESCRIPTION | ||
|  | 
 | ||
|  | A full description of the application and its features. | ||
|  | May include numerous subsections (i.e. =head2, =head3, etc.) | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DIAGNOSTICS | ||
|  | 
 | ||
|  | A list of every error and warning message that the application can generate | ||
|  | (even the ones that will "never happen"), with a full explanation of each | ||
|  | problem, one or more likely causes, and any suggested remedies. If the | ||
|  | application generates exit status codes (e.g. under Unix) then list the exit | ||
|  | status associated with each error. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 CONFIGURATION AND ENVIRONMENT | ||
|  | 
 | ||
|  | A full explanation of any configuration system(s) used by the application, | ||
|  | including the names and locations of any configuration files, and the | ||
|  | meaning of any environment variables or properties that can be set. These | ||
|  | descriptions must also include details of any configuration language used | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 DEPENDENCIES | ||
|  | 
 | ||
|  | A list of all the other modules that this module relies upon, including any | ||
|  | restrictions on versions, and an indication whether these required modules are | ||
|  | part of the standard Perl distribution, part of the module's distribution, | ||
|  | or must be installed separately. | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 INCOMPATIBILITIES | ||
|  | 
 | ||
|  | A list of any modules that this module cannot be used in conjunction with. | ||
|  | This may be due to name conflicts in the interface, or competition for | ||
|  | system or program resources, or due to internal limitations of Perl | ||
|  | (for example, many modules that use source code filters are mutually | ||
|  | incompatible). | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 BUGS AND LIMITATIONS | ||
|  | 
 | ||
|  | A list of known problems with the module, together with some indication | ||
|  | whether they are likely to be fixed in an upcoming release. | ||
|  | 
 | ||
|  | Also a list of restrictions on the features the module does provide: | ||
|  | data types that cannot be handled, performance issues and the circumstances | ||
|  | in which they may arise, practical limitations on the size of data sets, | ||
|  | special cases that are not (yet) handled, etc. | ||
|  | 
 | ||
|  | The initial template usually just has: | ||
|  | 
 | ||
|  | There are no known bugs in this module. | ||
|  | Please report problems to <Maintainer name(s)>  (<contact address>) | ||
|  | Patches are welcome. | ||
|  | 
 | ||
|  | =head1 AUTHOR | ||
|  | 
 | ||
|  | <Author name(s)>  (<contact address>) | ||
|  | 
 | ||
|  | 
 | ||
|  | =head1 LICENCE AND COPYRIGHT | ||
|  | 
 | ||
|  | Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved. | ||
|  | 
 | ||
|  | Followed by whatever licence you wish to release it under. | ||
|  | For Perl code that is often just: | ||
|  | 
 | ||
|  | This module is free software; you can redistribute it and/or | ||
|  | modify it under the same terms as Perl itself. See perldoc perlartistic. | ||
|  | 
 | ||
|  | This program is distributed in the hope that it will be useful, | ||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
|  | 
 | ||
|  | =cut | ||
|  | 
 | ||
|  | #}}} | ||
|  | 
 | ||
|  | # [zo to open fold, zc to close] | ||
|  | 
 | ||
|  | # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||
|  | 
 |