2024-08-22 12:25:22 +00:00
|
|
|
#!/usr/bin/env perl
|
|
|
|
#===============================================================================
|
|
|
|
#
|
|
|
|
# FILE: view_derivatives
|
|
|
|
#
|
|
|
|
# USAGE: ./view_derivatives metadata_file
|
|
|
|
#
|
|
|
|
# DESCRIPTION: Experimental!
|
|
|
|
# Reads a JSON file containing IA metadata for an item (HPR
|
|
|
|
# show) and processes 'derived' files from the IA. Their
|
|
|
|
# relationships are usually simple, but sometimes can be rather
|
|
|
|
# weird, so building a representation of the hierarchy might be
|
|
|
|
# a way to understand what's there and maybe make a list for the
|
|
|
|
# 'ia delete' command to work on!
|
|
|
|
# Each 'original' file may be accompanied by 'derived' files. So
|
|
|
|
# an image file might generate a thumbnail for example. The
|
|
|
|
# image will be marked in the JSON metadata as "original" and
|
|
|
|
# the derived file as a "derivative" with the file it was built
|
|
|
|
# from being its "parent". There can also be "derivative" files
|
|
|
|
# built from other "derivative"s in rare cases. This has been
|
|
|
|
# seen when an EPUB file is used to build a PDF "derivative",
|
|
|
|
# and then other versions of the PDF are generated as
|
|
|
|
# "derivatives" with the PDF as the "parent".
|
|
|
|
# The point is that we do not want the "derivatives". We (HPR)
|
|
|
|
# generate all of the versions of a given file we require:
|
|
|
|
# audio types and thumbnails, for example. The tools we use can
|
|
|
|
# disable the generation of "derivatives", so it has been rare
|
|
|
|
# to see them in recent times. However, with older shows we
|
|
|
|
# either couldn't stop the "derive" process, or some IA servers
|
|
|
|
# ignored our '--no-derive' options. Now (August 2024) we are
|
|
|
|
# cleaning up the HPR collection, and thus this script has been
|
|
|
|
# developed.
|
|
|
|
# Special note: any cleaning up of IA items needs to be done
|
|
|
|
# before files are moved around. The parental relationships are
|
|
|
|
# stored in the metadata, and do not track file movements. This
|
|
|
|
# script cannot analyse metadata from after such rearrangements!
|
|
|
|
# [It may be possible to adjust these metadata fields, but it
|
|
|
|
# does not seem to be necessary if the correct sequence of
|
|
|
|
# changes is adhered to - find derivatives, delete derivatives,
|
|
|
|
# move files]
|
|
|
|
#
|
|
|
|
# OPTIONS: ---
|
|
|
|
# REQUIREMENTS: ---
|
|
|
|
# BUGS: ---
|
|
|
|
# NOTES: ---
|
|
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
2024-11-23 22:28:52 +00:00
|
|
|
# VERSION: 0.0.4
|
2024-08-22 12:25:22 +00:00
|
|
|
# CREATED: 2024-08-12 16:26:29
|
2024-11-23 22:28:52 +00:00
|
|
|
# REVISION: 2024-09-17 17:03:27
|
2024-08-22 12:25:22 +00:00
|
|
|
#
|
|
|
|
#===============================================================================
|
|
|
|
|
|
|
|
use v5.36;
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
use feature qw{ postderef say signatures state try };
|
|
|
|
no warnings
|
|
|
|
qw{ experimental::postderef experimental::signatures experimental::try };
|
|
|
|
|
|
|
|
use open ':std', ':encoding(UTF-8)'; # Make all IO UTF-8
|
|
|
|
|
|
|
|
use Carp;
|
|
|
|
use Getopt::Long;
|
|
|
|
use Pod::Usage;
|
|
|
|
|
|
|
|
use File::Slurper qw{ read_text read_lines };
|
|
|
|
use JSON;
|
|
|
|
|
|
|
|
use Data::Dumper;
|
|
|
|
|
|
|
|
#
|
|
|
|
# Version number (Incremented by Vim)
|
|
|
|
#
|
2024-11-23 22:28:52 +00:00
|
|
|
our $VERSION = '0.0.4';
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
#
|
|
|
|
# Script and directory names
|
|
|
|
#
|
|
|
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
|
|
|
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
|
|
|
$DIR = '.' unless $DIR;
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
# Declarations
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Constants and other declarations
|
|
|
|
#
|
|
|
|
my $basedir = "$ENV{HOME}/HPR/InternetArchive";
|
|
|
|
my $outputfile = "$basedir/$PROG.svg";
|
|
|
|
my $logfile = "$basedir/$PROG.log";
|
|
|
|
|
|
|
|
my ( $file, $filebuffer, $json, @jsonbuffer );
|
|
|
|
my ( %filetree, @original, @derived );
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
# Options
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Option defaults
|
|
|
|
#
|
|
|
|
my $DEFDEBUG = 0;
|
|
|
|
|
|
|
|
#
|
|
|
|
# Options and arguments
|
|
|
|
#
|
|
|
|
my %options;
|
|
|
|
Options( \%options );
|
|
|
|
|
|
|
|
#
|
|
|
|
# Default help shows minimal information
|
|
|
|
#
|
|
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
|
|
|
|
if ( $options{'help'} );
|
|
|
|
|
|
|
|
#
|
|
|
|
# The -documentation or -man option shows the full POD documentation through
|
|
|
|
# a pager for convenience
|
|
|
|
#
|
|
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
|
|
|
|
if ( $options{'documentation'} );
|
|
|
|
|
|
|
|
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG );
|
|
|
|
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
|
|
|
|
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
|
|
|
|
|
|
|
|
my $list_derived
|
|
|
|
= ( defined( $options{list_derived} ) ? $options{list_derived} : 0 );
|
|
|
|
|
|
|
|
#
|
|
|
|
# Get the filename argument (metadata in JSON format)
|
|
|
|
#
|
|
|
|
$file = shift;
|
|
|
|
die "File of IA metadata is required\n" unless ($file);
|
|
|
|
|
|
|
|
#
|
|
|
|
# Read the entire JSON file into a buffer
|
|
|
|
#
|
|
|
|
try {
|
|
|
|
$filebuffer = read_text($file);
|
|
|
|
}
|
|
|
|
|
|
|
|
catch ($e) {
|
|
|
|
die "Failed to read JSON file $file\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Parse the JSON from the buffer
|
|
|
|
#
|
|
|
|
try {
|
|
|
|
$json = JSON->new;
|
|
|
|
@jsonbuffer = $json->incr_parse($filebuffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
catch ($e) {
|
|
|
|
die "Failed to parse JSON\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
_debug($DEBUG > 3, '@jsonbuffer: ' . Dumper(\@jsonbuffer));
|
|
|
|
|
|
|
|
#
|
|
|
|
# Check that this is the type of JSON we need
|
|
|
|
#
|
|
|
|
die "Empty JSON?\n" unless (@jsonbuffer);
|
|
|
|
|
|
|
|
#
|
|
|
|
# We got an array of one element from 'ia metadata <item>'. Use $md to
|
|
|
|
# reference that one element
|
|
|
|
#
|
|
|
|
my $md = $jsonbuffer[0];
|
|
|
|
|
|
|
|
#
|
2024-11-23 22:28:52 +00:00
|
|
|
# Collect the identifier from the parsed JSON and define the derived files we
|
|
|
|
# don't want to delete. (Found cases of audio files being "derived" in 1672
|
|
|
|
# and 1664)
|
2024-08-22 12:25:22 +00:00
|
|
|
#
|
|
|
|
my $identifier = $md->{metadata}->{identifier};
|
2024-11-23 22:28:52 +00:00
|
|
|
my $skip_re = qr{^${identifier}\.(flac|mp3|ogg|opus|png|spx|wav)$};
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
#
|
|
|
|
# Build a hash from the original and derived files referenced in the metadata.
|
|
|
|
# The hash will contain all but a number of files which are not of interest
|
|
|
|
# when sorting out which file came from which. At the end of the loop the
|
|
|
|
# structure will contain back links with child files pointing to parents. The
|
|
|
|
# forward child links get added later.
|
|
|
|
#
|
|
|
|
# The original names of files are also kept in an array to help with
|
|
|
|
# traversing the main hash.
|
|
|
|
#
|
|
|
|
foreach my $file ( @{ $md->{files} } ) {
|
|
|
|
my $fname = $file->{name};
|
|
|
|
|
|
|
|
printf "%-40s %-10s %s\n", $fname, $file->{source},
|
|
|
|
coalesce( $file->{original}, 'null' )
|
|
|
|
if ($verbose > 1);
|
|
|
|
|
|
|
|
unless ( $file->{source} =~ /[Mm]etadata/
|
|
|
|
|| $file->{format}
|
|
|
|
=~ /[Mm]etadata|Item Tile|Columbia Peaks|Spectrogram/ )
|
|
|
|
{
|
|
|
|
$filetree{$fname} = {
|
|
|
|
source => $file->{source},
|
|
|
|
parent => $file->{original},
|
|
|
|
format => $file->{format},
|
|
|
|
children => [],
|
|
|
|
};
|
|
|
|
|
|
|
|
if ( $file->{source} eq 'original' ) {
|
|
|
|
push( @original, $fname );
|
|
|
|
}
|
|
|
|
elsif ( $file->{source} eq 'derivative' ) {
|
|
|
|
push( @derived, $fname );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Visit all the nodes for derived files. They point to their parent node, so
|
|
|
|
# we can visit this node and add the children to an internal array.
|
|
|
|
#
|
|
|
|
foreach my $file (@derived) {
|
|
|
|
if ( defined( $filetree{$file}->{parent} ) ) {
|
|
|
|
push(
|
|
|
|
@{ $filetree{ $filetree{$file}->{parent} }->{children} },
|
|
|
|
$file
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# The trees are built, so we can dump the final result
|
|
|
|
#
|
|
|
|
_debug(
|
|
|
|
$DEBUG > 2,
|
|
|
|
'%filetree'
|
|
|
|
. sprintf( '[%d]: ', scalar( keys(%filetree) ) )
|
|
|
|
. Dumper( \%filetree )
|
|
|
|
);
|
|
|
|
|
|
|
|
_debug($DEBUG > 2, '@original: ' . Dumper(\@original));
|
|
|
|
_debug($DEBUG > 2, '@derived: ' . Dumper(\@derived));
|
|
|
|
|
|
|
|
#
|
|
|
|
# If the verbosity level is greater than 0 scan all the 'original' files in
|
|
|
|
# the '$filetree' hash. For each one walk its 'children' array if there's
|
|
|
|
# anything in it, and recurse into each node it points to. It's possible to
|
|
|
|
# have further levels than the one below the parent, but very little seems to
|
|
|
|
# be set up this way so far.
|
|
|
|
#
|
|
|
|
if ($verbose > 0) {
|
|
|
|
foreach my $file ( sort(@original) ) {
|
|
|
|
#
|
|
|
|
# Print the top level node and recurse into any children and print them
|
|
|
|
#
|
|
|
|
display_nodes( \%filetree, 0, $file );
|
|
|
|
}
|
|
|
|
say '-' x 10;
|
|
|
|
}
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
#
|
|
|
|
# List derived files that can be deleted, being careful not to delete the
|
|
|
|
# audio or the PNG image created by IA code.
|
|
|
|
#
|
2024-08-22 12:25:22 +00:00
|
|
|
if ($list_derived) {
|
|
|
|
foreach my $file ( sort(@derived) ) {
|
2024-11-23 22:28:52 +00:00
|
|
|
say "$file" unless ($file =~ $skip_re);
|
2024-08-22 12:25:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
exit;
|
|
|
|
|
|
|
|
#=== FUNCTION ================================================================
|
|
|
|
# NAME: display_nodes
|
|
|
|
# PURPOSE: Walks a tree of original and derived files in the IA metadata
|
|
|
|
# PARAMETERS: $tree Hashref pointing to the file tree
|
|
|
|
# $level The current level within the tree
|
|
|
|
# $key Hash key for the current node
|
|
|
|
# RETURNS: Nothing
|
|
|
|
# DESCRIPTION: Recurses through a tree
|
|
|
|
# THROWS: No exceptions
|
|
|
|
# COMMENTS: None
|
|
|
|
# SEE ALSO: N/A
|
|
|
|
#===============================================================================
|
|
|
|
sub display_nodes {
|
|
|
|
my ($tree, $level, $key) = @_;
|
|
|
|
|
|
|
|
#
|
|
|
|
# Report this node
|
|
|
|
#
|
|
|
|
printf "%s%d: %s\n", "\t" x $level, $level, $key;
|
|
|
|
|
|
|
|
#
|
|
|
|
# If there are children we use each to recurse to a lower level
|
|
|
|
#
|
|
|
|
if (scalar(@{$tree->{$key}->{children}}) > 0) {
|
|
|
|
$level++;
|
|
|
|
|
|
|
|
foreach my $child (@{$tree->{$key}->{children}}) {
|
|
|
|
display_nodes($tree, $level, $child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
#=== FUNCTION ================================================================
|
|
|
|
# NAME: coalesce
|
|
|
|
# PURPOSE: To find the first defined argument and return it
|
|
|
|
# PARAMETERS: Arbitrary number of arguments
|
|
|
|
# RETURNS: The first defined argument or undef if there are none
|
|
|
|
# DESCRIPTION: Just a simple way of ensuring an 'undef' value is never
|
|
|
|
# returned when doing so might be a problem.
|
|
|
|
# THROWS: No exceptions
|
|
|
|
# COMMENTS: None
|
|
|
|
# SEE ALSO: N/A
|
|
|
|
#===============================================================================
|
|
|
|
sub coalesce {
|
|
|
|
foreach (@_) {
|
|
|
|
return $_ if defined($_);
|
|
|
|
}
|
|
|
|
return undef; ## no critic
|
|
|
|
}
|
|
|
|
|
|
|
|
#=== FUNCTION ================================================================
|
|
|
|
# NAME: _debug
|
|
|
|
# PURPOSE: Prints debug reports
|
|
|
|
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
|
|
|
|
# $message Message to print
|
|
|
|
# RETURNS: Nothing
|
|
|
|
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
|
|
|
|
# newline and then adds one in the 'print' to the caller doesn't
|
|
|
|
# have to bother. Prepends the message with 'D> ' to show it's
|
|
|
|
# a debug message.
|
|
|
|
# THROWS: No exceptions
|
|
|
|
# COMMENTS: None
|
|
|
|
# SEE ALSO: N/A
|
|
|
|
#===============================================================================
|
|
|
|
sub _debug {
|
|
|
|
my ( $active, $message ) = @_;
|
|
|
|
|
|
|
|
chomp($message);
|
|
|
|
print "D> $message\n" if $active;
|
|
|
|
}
|
|
|
|
|
|
|
|
#=== FUNCTION ================================================================
|
|
|
|
# NAME: Options
|
|
|
|
# PURPOSE: Processes command-line options
|
|
|
|
# PARAMETERS: $optref Hash reference to hold the options
|
|
|
|
# RETURNS: Undef
|
|
|
|
# DESCRIPTION: Process the options we want to offer. See the documentation
|
|
|
|
# for details
|
|
|
|
# THROWS: no exceptions
|
|
|
|
# COMMENTS: none
|
|
|
|
# SEE ALSO: n/a
|
|
|
|
#===============================================================================
|
|
|
|
sub Options {
|
|
|
|
my ($optref) = @_;
|
|
|
|
|
|
|
|
my @options = (
|
|
|
|
"help", "documentation|man", "debug=i", "dry-run!",
|
|
|
|
"verbose+", "list_derived!"
|
|
|
|
);
|
|
|
|
|
|
|
|
if ( !GetOptions( $optref, @options ) ) {
|
|
|
|
pod2usage(
|
|
|
|
-msg => "$PROG version $VERSION\n",
|
|
|
|
-exitval => 1,
|
|
|
|
-verbose => 0
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
__END__
|
|
|
|
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
# Application Documentation
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
#{{{
|
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
view_derivatives - a tool to analyse IA metadata
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
=head1 VERSION
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
This documentation refers to view_derivatives version 0.0.4
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
=head1 USAGE
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
view_derivatives [-help] [-documentation|-man] [-debug=N] [-[no]dry-run]
|
|
|
|
[-verbose [-verbose] ...] [-[no]list_derived] metadata_file
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
# Parse the metadata and report the relationships beteen files
|
|
|
|
view_derivatives -verb METADATA
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
# Parse the metadata and write out a list of derived files for potential
|
|
|
|
# deletion.
|
|
|
|
view_derivatives -list_derived METADATA > FILE
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
=head1 REQUIRED ARGUMENTS
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
The name of a file created by the following command:
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
ia metadata "show" > metadata_file
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
The file is expected to contain one JSON object (in a one-element array). If
|
|
|
|
it contains more objects only the first will be processed.
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
=head1 OPTIONS
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item B<-help>
|
|
|
|
|
|
|
|
Prints a brief help message describing the usage of the program, and then exits.
|
|
|
|
|
|
|
|
=item B<-documentation> B<-man>
|
|
|
|
|
|
|
|
Displays the entirety of the documentation (using a pager), and then exits. To
|
|
|
|
generate a PDF version use the I<pod2pdf> tool from
|
|
|
|
I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
|
|
|
|
installed with the cpan tool as App::pod2pdf. Use the command:
|
|
|
|
|
|
|
|
pod2pdf view_derivatives --out=view_derivatives.pdf
|
|
|
|
|
|
|
|
=item B<-debug=N>
|
|
|
|
|
|
|
|
Selects a level of debugging. Debug information consists of a line or series
|
|
|
|
of lines prefixed with the characters 'D>':
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item B<0>
|
|
|
|
|
|
|
|
No debug output is generated: this is the default
|
|
|
|
|
|
|
|
=item B<3>
|
|
|
|
|
|
|
|
Prints all data structures from options
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
(The debug levels need work!)
|
|
|
|
|
|
|
|
=item B<-[no]dry-run>
|
|
|
|
|
|
|
|
Enable/disable dry run mode (default off)
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
=item B<-verbose>
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
Sets the verbosity level. If the option is omitted then the level is zero (no
|
|
|
|
verbose output). Thereafter, for each occurrence of the option the verbosity
|
|
|
|
level is incremented. Only levels 1 and 2 are currently catered for. Any
|
|
|
|
levels above 2 produce the same result as level 2.
|
|
|
|
|
|
|
|
=item B<-[no]list_derived>
|
|
|
|
|
|
|
|
This option is off by default. Turning it on causes the script to write all
|
|
|
|
derived files to standard output. If the verbosity level is zero this is the
|
|
|
|
only output from the script.
|
|
|
|
|
|
|
|
The idea is that at verbosity level 1 or 2 information is displayed about the
|
|
|
|
relationship of files in the metadata, for human consumption. If
|
|
|
|
B<-nolist_derived> is the setting (or default) then this is all that is shown.
|
|
|
|
|
|
|
|
If the verbosity level is zero and B<-list_derived> is on then only the list
|
|
|
|
of derived files will be generated, and this can be used to delete the files
|
|
|
|
from the IA.
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
=head2 OVERVIEW
|
|
|
|
|
|
|
|
Items on the IA (Internet Archive, or I<archive.org>) consist of metadata and
|
|
|
|
files. Each item generated for HPR is a show or episode. Most files comprising
|
|
|
|
the episode on the IA are those which are part of the episode on the HPR
|
|
|
|
server. A few extra files are created by the IA software, but these are part
|
|
|
|
of the metadata (HTML details, upload date, etc.)
|
|
|
|
|
|
|
|
By default the IA software will create additional files which are derived from
|
|
|
|
the original files. Typical examples are other audio formats, such as Ogg or
|
|
|
|
Mp3. We have been disabling this derivation process for several years for
|
|
|
|
various reasons, preferring to generate our own derivatives. IA-generated
|
|
|
|
audio derivatives do not have ID3 and similar tags, whereas HPR-generated
|
|
|
|
audio formats do.
|
|
|
|
|
|
|
|
Historically it was difficult to disable the derivation process. Even though
|
|
|
|
there were settings to do this they apparently didn't work on all of the
|
|
|
|
servers making up the IA, and so older items may have many derived files.
|
|
|
|
|
|
|
|
This script assists with identifying unwanted derivatives and with their
|
|
|
|
deletion.
|
|
|
|
|
|
|
|
=head2 METADATA
|
|
|
|
|
|
|
|
The metadata for an item can be obtained (by a registered user) from the IA
|
|
|
|
using the B<ia> tool. Its format is JSON, and this script uses a JSON module
|
|
|
|
to parse it.
|
|
|
|
|
|
|
|
=head2 FILE RELATIONSHIPS
|
2024-08-22 12:25:22 +00:00
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
The JSON metadata contains details of all files comprising the IA item.
|
|
|
|
It contains details such as the name, size, and type of each file. It also
|
|
|
|
categorises files into groups such as I<original> and I<derived>. Files which
|
|
|
|
are derived have parents. The script uses this to build tree-like data
|
|
|
|
structures of derived files based on the original files. All children of an
|
|
|
|
original file will be derived, but some derived files may also have children.
|
|
|
|
|
|
|
|
The derivatives can be classified simply as children of original files or of
|
|
|
|
derived files. These are what are listed if required and what are used in the
|
|
|
|
deletion process.
|
|
|
|
|
|
|
|
=head2 DELETING UNWANTED DERIVATIVES
|
|
|
|
|
|
|
|
The simplest method is to pipe the output from the script with verbose level
|
|
|
|
zero and with B<-list_derived> enabled into B<xargs> in order to run a command
|
|
|
|
which will delete the unwanted derivatives.
|
|
|
|
|
|
|
|
One usage is:
|
|
|
|
|
|
|
|
./view_derivatives -list_derived metadata.json |\
|
|
|
|
xargs ia delete hpr1234 --no-backup
|
|
|
|
|
|
|
|
This will generate a list of files to be deleted, then pipe them to B<xargs>
|
|
|
|
which will construct a command by appending the names to the command template.
|
|
|
|
|
|
|
|
This approach is not ideal since it does not handle the case where there is
|
|
|
|
nothing to delete. The script B<snapshot_metadata> manages this situation by
|
|
|
|
generating the metadata and saving it in a file, then it runs
|
|
|
|
B<view_derivatives> on this file and generates a file of derivatives. If this
|
|
|
|
file is not empty it can be used to perform the deletions, and otherwise no
|
|
|
|
attempt will be made.
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
|
|
|
|
A list of every error and warning message that the application can generate
|
|
|
|
(even the ones that will "never happen"), with a full explanation of each
|
|
|
|
problem, one or more likely causes, and any suggested remedies. If the
|
|
|
|
application generates exit status codes (e.g. under Unix) then list the exit
|
|
|
|
status associated with each error.
|
|
|
|
|
|
|
|
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT
|
|
|
|
|
|
|
|
A full explanation of any configuration system(s) used by the application,
|
|
|
|
including the names and locations of any configuration files, and the
|
|
|
|
meaning of any environment variables or properties that can be set. These
|
|
|
|
descriptions must also include details of any configuration language used
|
|
|
|
|
|
|
|
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
|
|
|
|
A list of all the other modules that this module relies upon, including any
|
|
|
|
restrictions on versions, and an indication whether these required modules are
|
|
|
|
part of the standard Perl distribution, part of the module's distribution,
|
|
|
|
or must be installed separately.
|
|
|
|
|
|
|
|
|
|
|
|
=head1 INCOMPATIBILITIES
|
|
|
|
|
|
|
|
A list of any modules that this module cannot be used in conjunction with.
|
|
|
|
This may be due to name conflicts in the interface, or competition for
|
|
|
|
system or program resources, or due to internal limitations of Perl
|
|
|
|
(for example, many modules that use source code filters are mutually
|
|
|
|
incompatible).
|
|
|
|
|
|
|
|
|
|
|
|
=head1 BUGS AND LIMITATIONS
|
|
|
|
|
|
|
|
A list of known problems with the module, together with some indication
|
|
|
|
whether they are likely to be fixed in an upcoming release.
|
|
|
|
|
|
|
|
Also a list of restrictions on the features the module does provide:
|
|
|
|
data types that cannot be handled, performance issues and the circumstances
|
|
|
|
in which they may arise, practical limitations on the size of data sets,
|
|
|
|
special cases that are not (yet) handled, etc.
|
|
|
|
|
|
|
|
The initial template usually just has:
|
|
|
|
|
|
|
|
There are no known bugs in this module.
|
2024-11-23 22:28:52 +00:00
|
|
|
Please report problems to Dave Morriss (dave.morriss@gmail.com)
|
2024-08-22 12:25:22 +00:00
|
|
|
Patches are welcome.
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
|
2024-08-22 12:25:22 +00:00
|
|
|
=head1 AUTHOR
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
Dave Morriss (dave.morriss@gmail.com)
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
=head1 LICENCE AND COPYRIGHT
|
|
|
|
|
2024-11-23 22:28:52 +00:00
|
|
|
Copyright (c) 2024 Dave Morriss (dave.morriss@gmail.com). All rights reserved.
|
2024-08-22 12:25:22 +00:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
|
|
|
|
=cut
|
|
|
|
|
|
|
|
#}}}
|
|
|
|
|
|
|
|
# [zo to open fold, zc to close]
|
|
|
|
|
|
|
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|
|
|
|
|