Compare commits
2 Commits
a4c24296ef
...
7e925621f4
Author | SHA1 | Date | |
---|---|---|---|
|
7e925621f4 | ||
|
31eb5d200f |
@ -307,7 +307,7 @@ VERBOSE=${VERBOSE:-0}
|
||||
#
|
||||
# Should have one argument
|
||||
#
|
||||
if [[ $# != 1 ]]; then
|
||||
if [[ $# -ne 1 ]]; then
|
||||
coloured 'red' "Missing argument"
|
||||
_usage 1
|
||||
fi
|
||||
|
@ -6,7 +6,7 @@
|
||||
# USAGE: ./repair_assets showid
|
||||
#
|
||||
# DESCRIPTION: Given a show where there was a directory of asset files on the
|
||||
# old HPR server whichj got lost in the migration, rebuild it
|
||||
# old HPR server which got lost in the migration, rebuild it
|
||||
# and fill it with assets from the IA. Modify the show notes to
|
||||
# point to these recovered assets.
|
||||
#
|
||||
@ -15,15 +15,15 @@
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.8
|
||||
# VERSION: 0.0.10
|
||||
# CREATED: 2024-05-10 21:26:31
|
||||
# REVISION: 2024-08-23 11:55:25
|
||||
# REVISION: 2024-10-02 17:34:47
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
# set -o nounset # Treat unset variables as an error
|
||||
|
||||
VERSION="0.0.8"
|
||||
VERSION="0.0.10"
|
||||
|
||||
SCRIPT=${0##*/}
|
||||
# DIR=${0%/*}
|
||||
@ -96,6 +96,38 @@ trap 'cleanup_temp $TMP1 $TMP2' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
|
||||
# $3 Name of array to receive list of missing assets
|
||||
# RETURNS: Nothing
|
||||
#===============================================================================
|
||||
# find_missing () {
|
||||
# local -n IA="${1}"
|
||||
# local -n HPR="${2}"
|
||||
# local output="${3}"
|
||||
#
|
||||
# local -A hIA hHPR
|
||||
# local i key
|
||||
#
|
||||
# #
|
||||
# # Make a hash keyed by the IA file base names from an indexed array
|
||||
# #
|
||||
# for (( i=0; i<${#IA[@]}; i++ )); do
|
||||
# hIA+=([${IA[$i]##*/}]=${IA[$i]})
|
||||
# done
|
||||
#
|
||||
# #
|
||||
# # Make a hash keyed by the HPR file base names from an indexed array
|
||||
# #
|
||||
# for (( i=0; i<${#HPR[@]}; i++ )); do
|
||||
# hHPR+=([${HPR[$i]##*/}]=${HPR[$i]})
|
||||
# done
|
||||
#
|
||||
# #
|
||||
# # Use the basename keys to check what's missing, but return the full path
|
||||
# # names.
|
||||
# #
|
||||
# for key in "${!hIA[@]}"; do
|
||||
# if ! exists_in hHPR "$key"; then
|
||||
# eval "$output+=('${hIA[$key]}')"
|
||||
# fi
|
||||
# done
|
||||
# }
|
||||
find_missing () {
|
||||
local -n IA="${1}"
|
||||
local -n HPR="${2}"
|
||||
@ -105,26 +137,29 @@ find_missing () {
|
||||
local i key
|
||||
|
||||
#
|
||||
# Make a hash keyed by the IA file base names from an indexed array
|
||||
# Make a hash keyed by the full IA paths from an indexed array
|
||||
#
|
||||
for (( i=0; i<${#IA[@]}; i++ )); do
|
||||
hIA+=([${IA[$i]##*/}]=${IA[$i]})
|
||||
hIA+=([${IA[$i]}]=$i)
|
||||
done
|
||||
|
||||
#
|
||||
# Make a hash keyed by the HPR file base names from an indexed array
|
||||
# Make a hash keyed by the HPR file paths from an indexed array, but
|
||||
# remove the first element for parity with the IA paths. We are going to
|
||||
# copy the IA paths, not these, so we never need the full paths again
|
||||
# here.
|
||||
#
|
||||
for (( i=0; i<${#HPR[@]}; i++ )); do
|
||||
hHPR+=([${HPR[$i]##*/}]=${HPR[$i]})
|
||||
hHPR+=([${HPR[$i]#*/}]=$i)
|
||||
done
|
||||
|
||||
#
|
||||
# Use the basename keys to check what's missing, but return the full path
|
||||
# names.
|
||||
# Use the full path keys to check what's missing, and return the IA full
|
||||
# path names.
|
||||
#
|
||||
for key in "${!hIA[@]}"; do
|
||||
if ! exists_in hHPR "$key"; then
|
||||
eval "$output+=('${hIA[$key]}')"
|
||||
eval "$output+=('$key')"
|
||||
fi
|
||||
done
|
||||
}
|
||||
@ -267,10 +302,13 @@ fi
|
||||
show="${1,,}"
|
||||
|
||||
#
|
||||
# Ensure show id is correctly formatted. We want it to be 'hpr1234'
|
||||
# Ensure show id is correctly formatted. We want it to be 'hpr1234' but we
|
||||
# allow the 'hpr' bit to be omitted, as well as any leading zeroes. We need to
|
||||
# handle the weirdness of "leading zero means octal" though, but we always
|
||||
# store it as 'hpr1234' once processed.
|
||||
#
|
||||
if [[ $show =~ (hpr)?([0-9]+) ]]; then
|
||||
printf -v show 'hpr%04d' "${BASH_REMATCH[2]}"
|
||||
printf -v show 'hpr%04d' "$((10#${BASH_REMATCH[2]}))"
|
||||
else
|
||||
coloured 'red' "Incorrect show specification: $show"
|
||||
coloured 'yellow' "Use 'hpr9999' or '9999' format"
|
||||
@ -443,7 +481,14 @@ ignore_re="index.html$"
|
||||
# Run the command and save the output. Save the asset names returned in an
|
||||
# array. TODO: Handle errors from the command
|
||||
#
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
#
|
||||
# NOTE: We also want to interrogate the HPR state in dry-run mode
|
||||
#
|
||||
# if [[ $DRYRUN -eq 0 ]]; then
|
||||
# else
|
||||
# coloured 'yellow' "Would have searched for assets on the HPR server"
|
||||
# fi
|
||||
|
||||
eval "$command" > "$TMP2"
|
||||
RES=$?
|
||||
if [[ $RES -eq 0 ]]; then
|
||||
@ -461,19 +506,20 @@ if [[ $DRYRUN -eq 0 ]]; then
|
||||
_log "Failed while searching for HPR assets"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
coloured 'yellow' "Would have searched for assets on the HPR server"
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Compare the two asset lists and return what's missing on the HPR server
|
||||
#-------------------------------------------------------------------------------
|
||||
# TODO: This algorithm does not handle the instance where there are pictures
|
||||
# in one directory and a lower directory containing thumbnails, AND THE FILE
|
||||
# NAMES ARE THE SAME!
|
||||
# TODO: The algorithm in find_missing does not handle the instance where there
|
||||
# are pictures in one directory and a lower directory containing thumbnails,
|
||||
# AND THE FILE NAMES ARE THE SAME!
|
||||
#
|
||||
declare -a missing
|
||||
if [[ ${#hpr_asset[@]} -eq 0 ]]; then
|
||||
missing=( "${ia_asset[@]}" )
|
||||
else
|
||||
find_missing ia_asset hpr_asset missing
|
||||
fi
|
||||
_verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")"
|
||||
_verbose "$(printf '%s\n' "${missing[@]}")"
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
# temporarily on 'borg') and determines which have not been
|
||||
# uploaded, then takes steps to perform the uploads.
|
||||
#
|
||||
# Version 0.0.11 onwards has the capability to repair an IA item
|
||||
# Version 0.0.12 onwards has the capability to repair an IA item
|
||||
# from the HPR backup disk. This seems to be necessary because
|
||||
# the transcripts were not carried over (although we are
|
||||
# adding them to the IA for new shows now, older ones were never
|
||||
@ -44,15 +44,15 @@
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.11
|
||||
# VERSION: 0.0.12
|
||||
# CREATED: 2020-01-05 22:42:46
|
||||
# REVISION: 2024-07-20 17:06:10
|
||||
# REVISION: 2024-09-13 18:19:59
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
#set -o nounset # Treat unset variables as an error
|
||||
|
||||
VERSION="0.0.11"
|
||||
VERSION="0.0.12"
|
||||
|
||||
SCRIPT=${0##*/}
|
||||
# DIR=${0%/*}
|
||||
@ -492,9 +492,9 @@ else
|
||||
|
||||
#
|
||||
# Stop the missed file loop if we have reached the limiting number, in
|
||||
# dry-run and live mode
|
||||
# dry-run and live mode, but not extended mode
|
||||
#
|
||||
[[ $upload_count -eq $LIMIT ]] && {
|
||||
[[ $EXTENDED -eq 0 && $upload_count -eq $LIMIT ]] && {
|
||||
coloured 'blue' "Upload limit ($LIMIT) reached"
|
||||
break
|
||||
}
|
||||
|
@ -5,23 +5,28 @@
|
||||
#
|
||||
# USAGE: ./snapshot_metadata episode_number
|
||||
#
|
||||
# DESCRIPTION: Collects metadata from the IA for a given show and stores it
|
||||
# in the cache.
|
||||
# DESCRIPTION: Collects JSON metadata from the IA for a given show and stores
|
||||
# it in the cache. Runs 'view_derivatives' on the JSON to
|
||||
# display the derivatives if any, and to save their names if
|
||||
# found, for deletion.
|
||||
# Deletion is performed thus (external to this script):
|
||||
#
|
||||
# cat assets/hpr$(./next_repair)/derived.lis | xargs ia delete hpr$(./next_repair) --no-backup
|
||||
#
|
||||
# OPTIONS: ---
|
||||
# REQUIREMENTS: ---
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.2
|
||||
# VERSION: 0.0.3
|
||||
# CREATED: 2024-08-16 20:36:51
|
||||
# REVISION: 2024-08-17 10:31:15
|
||||
# REVISION: 2024-10-02 17:40:13
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
set -o nounset # Treat unset variables as an error
|
||||
|
||||
VERSION="0.0.2"
|
||||
VERSION="0.0.3"
|
||||
|
||||
SCRIPT=${0##*/}
|
||||
# DIR=${0%/*}
|
||||
@ -126,17 +131,19 @@ fi
|
||||
show="${1,,}"
|
||||
|
||||
#
|
||||
# Ensure show id is correctly formatted. We want it to be 'hpr1234'
|
||||
# Ensure show id is correctly formatted. We want it to be 'hpr1234' but we
|
||||
# allow the 'hpr' bit to be omitted, as well as any leading zeroes. We need to
|
||||
# handle the weirdness of "leading zero means octal" though, but we always
|
||||
# store it as 'hpr1234' once processed.
|
||||
#
|
||||
if [[ $show =~ (hpr)?([0-9]+) ]]; then
|
||||
printf -v show 'hpr%04d' "${BASH_REMATCH[2]}"
|
||||
printf -v show 'hpr%04d' "$((10#${BASH_REMATCH[2]}))"
|
||||
else
|
||||
coloured 'red' "Incorrect show specification: $show"
|
||||
coloured 'yellow' "Use 'hpr9999' or '9999' format"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Setting up paths
|
||||
#-------------------------------------------------------------------------------
|
||||
|
@ -44,9 +44,9 @@
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.2
|
||||
# VERSION: 0.0.4
|
||||
# CREATED: 2024-08-12 16:26:29
|
||||
# REVISION: 2024-08-17 13:44:44
|
||||
# REVISION: 2024-09-17 17:03:27
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
@ -71,7 +71,7 @@ use Data::Dumper;
|
||||
#
|
||||
# Version number (Incremented by Vim)
|
||||
#
|
||||
our $VERSION = '0.0.2';
|
||||
our $VERSION = '0.0.4';
|
||||
|
||||
#
|
||||
# Script and directory names
|
||||
@ -170,11 +170,12 @@ die "Empty JSON?\n" unless (@jsonbuffer);
|
||||
my $md = $jsonbuffer[0];
|
||||
|
||||
#
|
||||
# Collect the identifier from the parsed JSON and define the one derived file
|
||||
# we don't want to delete.
|
||||
# Collect the identifier from the parsed JSON and define the derived files we
|
||||
# don't want to delete. (Found cases of audio files being "derived" in 1672
|
||||
# and 1664)
|
||||
#
|
||||
my $identifier = $md->{metadata}->{identifier};
|
||||
my $item_png = "${identifier}.png";
|
||||
my $skip_re = qr{^${identifier}\.(flac|mp3|ogg|opus|png|spx|wav)$};
|
||||
|
||||
#
|
||||
# Build a hash from the original and derived files referenced in the metadata.
|
||||
@ -257,9 +258,13 @@ if ($verbose > 0) {
|
||||
say '-' x 10;
|
||||
}
|
||||
|
||||
#
|
||||
# List derived files that can be deleted, being careful not to delete the
|
||||
# audio or the PNG image created by IA code.
|
||||
#
|
||||
if ($list_derived) {
|
||||
foreach my $file ( sort(@derived) ) {
|
||||
say "$file" unless ($file eq $item_png);
|
||||
say "$file" unless ($file =~ $skip_re);
|
||||
}
|
||||
}
|
||||
|
||||
@ -377,49 +382,157 @@ __END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
view_derivatives - <One line description of application's purpose>
|
||||
view_derivatives - a tool to analyse IA metadata
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
The initial template usually just has:
|
||||
|
||||
This documentation refers to view_derivatives version 0.0.2
|
||||
|
||||
This documentation refers to view_derivatives version 0.0.4
|
||||
|
||||
=head1 USAGE
|
||||
|
||||
# Brief working invocation example(s) here showing the most common usage(s)
|
||||
view_derivatives [-help] [-documentation|-man] [-debug=N] [-[no]dry-run]
|
||||
[-verbose [-verbose] ...] [-[no]list_derived] metadata_file
|
||||
|
||||
# This section will be as far as many users ever read
|
||||
# so make it as educational and exemplary as possible.
|
||||
# Parse the metadata and report the relationships beteen files
|
||||
view_derivatives -verb METADATA
|
||||
|
||||
# Parse the metadata and write out a list of derived files for potential
|
||||
# deletion.
|
||||
view_derivatives -list_derived METADATA > FILE
|
||||
|
||||
=head1 REQUIRED ARGUMENTS
|
||||
|
||||
A complete list of every argument that must appear on the command line.
|
||||
when the application is invoked, explaining what each of them does, any
|
||||
restrictions on where each one may appear (i.e. flags that must appear
|
||||
before or after filenames), and how the various arguments and options
|
||||
may interact (e.g. mutual exclusions, required combinations, etc.)
|
||||
The name of a file created by the following command:
|
||||
|
||||
If all of the application's arguments are optional this section
|
||||
may be omitted entirely.
|
||||
ia metadata "show" > metadata_file
|
||||
|
||||
The file is expected to contain one JSON object (in a one-element array). If
|
||||
it contains more objects only the first will be processed.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
A complete list of every available option with which the application
|
||||
can be invoked, explaining what each does, and listing any restrictions,
|
||||
or interactions.
|
||||
=over 4
|
||||
|
||||
If the application has no options this section may be omitted entirely.
|
||||
=item B<-help>
|
||||
|
||||
Prints a brief help message describing the usage of the program, and then exits.
|
||||
|
||||
=item B<-documentation> B<-man>
|
||||
|
||||
Displays the entirety of the documentation (using a pager), and then exits. To
|
||||
generate a PDF version use the I<pod2pdf> tool from
|
||||
I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
|
||||
installed with the cpan tool as App::pod2pdf. Use the command:
|
||||
|
||||
pod2pdf view_derivatives --out=view_derivatives.pdf
|
||||
|
||||
=item B<-debug=N>
|
||||
|
||||
Selects a level of debugging. Debug information consists of a line or series
|
||||
of lines prefixed with the characters 'D>':
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<0>
|
||||
|
||||
No debug output is generated: this is the default
|
||||
|
||||
=item B<3>
|
||||
|
||||
Prints all data structures from options
|
||||
|
||||
=back
|
||||
|
||||
(The debug levels need work!)
|
||||
|
||||
=item B<-[no]dry-run>
|
||||
|
||||
Enable/disable dry run mode (default off)
|
||||
|
||||
=item B<-verbose>
|
||||
|
||||
Sets the verbosity level. If the option is omitted then the level is zero (no
|
||||
verbose output). Thereafter, for each occurrence of the option the verbosity
|
||||
level is incremented. Only levels 1 and 2 are currently catered for. Any
|
||||
levels above 2 produce the same result as level 2.
|
||||
|
||||
=item B<-[no]list_derived>
|
||||
|
||||
This option is off by default. Turning it on causes the script to write all
|
||||
derived files to standard output. If the verbosity level is zero this is the
|
||||
only output from the script.
|
||||
|
||||
The idea is that at verbosity level 1 or 2 information is displayed about the
|
||||
relationship of files in the metadata, for human consumption. If
|
||||
B<-nolist_derived> is the setting (or default) then this is all that is shown.
|
||||
|
||||
If the verbosity level is zero and B<-list_derived> is on then only the list
|
||||
of derived files will be generated, and this can be used to delete the files
|
||||
from the IA.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
A full description of the application and its features.
|
||||
May include numerous subsections (i.e. =head2, =head3, etc.)
|
||||
=head2 OVERVIEW
|
||||
|
||||
Items on the IA (Internet Archive, or I<archive.org>) consist of metadata and
|
||||
files. Each item generated for HPR is a show or episode. Most files comprising
|
||||
the episode on the IA are those which are part of the episode on the HPR
|
||||
server. A few extra files are created by the IA software, but these are part
|
||||
of the metadata (HTML details, upload date, etc.)
|
||||
|
||||
By default the IA software will create additional files which are derived from
|
||||
the original files. Typical examples are other audio formats, such as Ogg or
|
||||
Mp3. We have been disabling this derivation process for several years for
|
||||
various reasons, preferring to generate our own derivatives. IA-generated
|
||||
audio derivatives do not have ID3 and similar tags, whereas HPR-generated
|
||||
audio formats do.
|
||||
|
||||
Historically it was difficult to disable the derivation process. Even though
|
||||
there were settings to do this they apparently didn't work on all of the
|
||||
servers making up the IA, and so older items may have many derived files.
|
||||
|
||||
This script assists with identifying unwanted derivatives and with their
|
||||
deletion.
|
||||
|
||||
=head2 METADATA
|
||||
|
||||
The metadata for an item can be obtained (by a registered user) from the IA
|
||||
using the B<ia> tool. Its format is JSON, and this script uses a JSON module
|
||||
to parse it.
|
||||
|
||||
=head2 FILE RELATIONSHIPS
|
||||
|
||||
The JSON metadata contains details of all files comprising the IA item.
|
||||
It contains details such as the name, size, and type of each file. It also
|
||||
categorises files into groups such as I<original> and I<derived>. Files which
|
||||
are derived have parents. The script uses this to build tree-like data
|
||||
structures of derived files based on the original files. All children of an
|
||||
original file will be derived, but some derived files may also have children.
|
||||
|
||||
The derivatives can be classified simply as children of original files or of
|
||||
derived files. These are what are listed if required and what are used in the
|
||||
deletion process.
|
||||
|
||||
=head2 DELETING UNWANTED DERIVATIVES
|
||||
|
||||
The simplest method is to pipe the output from the script with verbose level
|
||||
zero and with B<-list_derived> enabled into B<xargs> in order to run a command
|
||||
which will delete the unwanted derivatives.
|
||||
|
||||
One usage is:
|
||||
|
||||
./view_derivatives -list_derived metadata.json |\
|
||||
xargs ia delete hpr1234 --no-backup
|
||||
|
||||
This will generate a list of files to be deleted, then pipe them to B<xargs>
|
||||
which will construct a command by appending the names to the command template.
|
||||
|
||||
This approach is not ideal since it does not handle the case where there is
|
||||
nothing to delete. The script B<snapshot_metadata> manages this situation by
|
||||
generating the metadata and saving it in a file, then it runs
|
||||
B<view_derivatives> on this file and generates a file of derivatives. If this
|
||||
file is not empty it can be used to perform the deletions, and otherwise no
|
||||
attempt will be made.
|
||||
|
||||
=head1 DIAGNOSTICS
|
||||
|
||||
@ -468,23 +581,18 @@ special cases that are not (yet) handled, etc.
|
||||
The initial template usually just has:
|
||||
|
||||
There are no known bugs in this module.
|
||||
Please report problems to <Maintainer name(s)> (<contact address>)
|
||||
Please report problems to Dave Morriss (dave.morriss@gmail.com)
|
||||
Patches are welcome.
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
<Author name(s)> (<contact address>)
|
||||
Dave Morriss (dave.morriss@gmail.com)
|
||||
|
||||
|
||||
=head1 LICENCE AND COPYRIGHT
|
||||
|
||||
Copyright (c) <year> <copyright holder> (<contact address>). All rights reserved.
|
||||
|
||||
Followed by whatever licence you wish to release it under.
|
||||
For Perl code that is often just:
|
||||
|
||||
This module is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself. See perldoc perlartistic.
|
||||
Copyright (c) 2024 Dave Morriss (dave.morriss@gmail.com). All rights reserved.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
|
Loading…
Reference in New Issue
Block a user