forked from HPR/hpr-tools
Updates since 2024-06-15
Database/query2tt2: comment and documentation updates; use of Perl's try/catch. InternetArchive/.make_metadata.cfg: added comments for readability InternetArchive/make_metadata: bug fix needed now that all shows on the HPR server have a directory with assets under it. InternetArchive/repair_assets: new Bash script in development. Collects assets from the IA and uploads them to a new directory on the HPR server. Will run 'fix_asset_links' (to repair asset links for their new directories) once it is ready. InternetArchive/repair_item: Bash script which was originally written to run on 'borg' and upload files to a new IA item when the uploads timed out. Now enhanced to upload missing files recovered from the HPR backup disk, such as transcripts.
This commit is contained in:
parent
9203dc26e0
commit
dc0f29e957
@ -26,9 +26,9 @@
|
||||
# BUGS: ---
|
||||
# NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.4
|
||||
# VERSION: 0.0.5
|
||||
# CREATED: 2021-06-18 13:24:49
|
||||
# REVISION: 2024-01-19 17:15:45
|
||||
# REVISION: 2024-06-29 18:42:49
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
@ -59,7 +59,7 @@ use Data::Dumper;
|
||||
#
|
||||
# Version number (manually incremented)
|
||||
#
|
||||
our $VERSION = '0.0.4';
|
||||
our $VERSION = '0.0.5';
|
||||
|
||||
#
|
||||
# Script and directory names
|
||||
@ -113,7 +113,7 @@ Options( \%options );
|
||||
#
|
||||
# Default help
|
||||
#
|
||||
pod2usage( -msg => "Version $VERSION\n", -exitval => 1 )
|
||||
pod2usage( -msg => "Version $VERSION\n", -exitval => 1, -verbose => 0 )
|
||||
if ( $options{'help'} );
|
||||
|
||||
#
|
||||
@ -234,7 +234,7 @@ catch ($e) {
|
||||
print STDERR "Failed to execute query.\n";
|
||||
print STDERR "Placeholder/Argument mismatch: $pcount/$acount\n";
|
||||
exit;
|
||||
};
|
||||
}
|
||||
|
||||
#
|
||||
# Grab everything from the query as an arrayref of hashrefs
|
||||
@ -325,7 +325,7 @@ sub _dbargs {
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: _define
|
||||
# PURPOSE: Handles multiple instances of the same option '-define x=42'
|
||||
# PURPOSE: Handles multiple instances of the option '-define x=42'
|
||||
# PARAMETERS: $opts hash reference holding the options
|
||||
# RETURNS: A hash containing all of the named items (e.g. { 'x' => 42 })
|
||||
# DESCRIPTION: If there are -define options they will be a hashref in the hash
|
||||
@ -386,12 +386,13 @@ query2tt2 - A script for formatting a report from database query using a templat
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
This documentation refers to query2tt2 version 0.0.4
|
||||
This documentation refers to query2tt2 version 0.0.5
|
||||
|
||||
=head1 USAGE
|
||||
|
||||
query2tt2 [-help] [-debug=N] [-config=FILE] [-query=FILE]
|
||||
[-template=FILE] [QUERY]
|
||||
query2tt2 [-help] [-doc] [-debug=N] [-config=FILE] [-query=FILE]
|
||||
[-template=FILE] [-dbargs=ARG1 [-dbarg=ARG2] ...]
|
||||
[define KEY1=VALUE [define key2=VALUE2] ...] [QUERY]
|
||||
|
||||
query2tt2 -help
|
||||
|
||||
@ -462,10 +463,10 @@ If neither method is used the script aborts with an error message.
|
||||
|
||||
=item B<-dbarg=ARG> [ B<-dbarg=ARG> ... ]
|
||||
|
||||
The query can have place holders ('?') in it and the corresponding values can
|
||||
be passed to the script through the B<-dbarg=ARG> option. The option can be
|
||||
repeated as many times as required and the order of B<ARG> values is
|
||||
preserved.
|
||||
The query can have place holders ('?') in it and the corresponding values for
|
||||
these placeholders can be passed to the script through the B<-dbarg=ARG>
|
||||
option. The option can be repeated as many times as required and the order of
|
||||
B<ARG> values is preserved.
|
||||
|
||||
=item B<-template=FILE>
|
||||
|
||||
@ -484,8 +485,10 @@ Output from the template is written to STDOUT.
|
||||
|
||||
The Template Toolkit (TT2) template may receive values from the command line
|
||||
using this option. The argument to the B<-define> option is a B<key=value>
|
||||
pair. Keys should be unique otherwise they will overwrite one another. The
|
||||
keys will become TT2 variables and the values will be assigned to them.
|
||||
pair. Keys should be unique otherwise they will overwrite one another. They
|
||||
should also not be 'names' or 'result' because these keys are used internally
|
||||
(for the data from the database). See below for more details. The keys will
|
||||
become TT2 variables and the values will be assigned to them.
|
||||
|
||||
=back
|
||||
|
||||
@ -548,12 +551,7 @@ The nominated template file could not be found.
|
||||
|
||||
An error has occurred while performing a database operation.
|
||||
|
||||
=item B<Failed to execure query.>
|
||||
|
||||
There is a mismatch between the number of placeholders in the query ('?'
|
||||
characters) and the number of arguments provided through the B<-dbargs=ARG>
|
||||
option. The script will attempt to analyse whether there are too many or too
|
||||
few arguments
|
||||
=item B<Failed to execute query.>
|
||||
|
||||
There is a mismatch between the number of placeholders in the query ('?'
|
||||
characters) and the number of arguments provided through the B<-dbargs=ARG>
|
||||
|
@ -1,16 +1,62 @@
|
||||
# Version for i7-desktop
|
||||
# .make_metadata.cfg 2023-07-06 11:54:49
|
||||
# .make_metadata.cfg 2024-07-08 13:55:23
|
||||
#
|
||||
|
||||
#
|
||||
# A sanity check value in case an episode number given is too big
|
||||
#
|
||||
max_epno = 9000
|
||||
|
||||
#
|
||||
# This is where the script will look for the audio files for upload (if there
|
||||
# are other "assets" it finds them itself)
|
||||
#
|
||||
#uploads = "/var/IA/uploads" # on the VPS and marvin
|
||||
uploads = "/home/cendjm/HPR/IA/uploads"
|
||||
|
||||
#
|
||||
# How a "standard" audio file name is made up
|
||||
#
|
||||
filetemplate = "hpr%04d.%s"
|
||||
|
||||
#
|
||||
# How to fill in the "missing bit" in relative URLs
|
||||
#
|
||||
baseURL = "https://hackerpublicradio.org/"
|
||||
|
||||
#
|
||||
# *** OBSOLETE ***
|
||||
# If we need to fetch the MP3 version of the audio, which we do for older
|
||||
# shows, these are under the 'local' directory. The 'eps' files are actually
|
||||
# redirections to the IA. This is not normally used for the weekly uploads.
|
||||
#
|
||||
#URLtemplate = "http://hackerpublicradio.org/eps/%s"
|
||||
#URLtemplate = "https://hackerpublicradio.org/local/%s"
|
||||
|
||||
#
|
||||
# Printf/sprintf template for building an URL which points back to the current
|
||||
# show on the HPR site.
|
||||
#
|
||||
sourceURLtemplate = "https://hackerpublicradio.org/eps/%s/index.html"
|
||||
|
||||
#
|
||||
# If we are having to collect assets from the HPR server and upload them to
|
||||
# the IA server we want the final product to be addressable according to the
|
||||
# following URL template.
|
||||
#
|
||||
IAURLtemplate = "https://archive.org/download/%s/%s"
|
||||
|
||||
#
|
||||
# We build a Bash script to perform the upload of files which aren't in the
|
||||
# CSV generated by make_metadata. We used to use the plain 'ia upload' command
|
||||
# but now we call a Bash function declared in the script which is slightly
|
||||
# cleverer. We need to do this to get round the IA code's tendency to "derive"
|
||||
# all audio, and in doing so strip any audio tags. We perform our own
|
||||
# equivalent of "derive" *with* the tags and upload them telling the IA *not*
|
||||
# to re-derive. Mostly it listens. There's also a whole thing about IA keeping
|
||||
# history of deletions which we want to turn off otherwise our items become
|
||||
# stuffed with unwanted garbage.
|
||||
#
|
||||
#iauploadtemplate = "ia upload %s %s --remote-name=%s"
|
||||
iauploadtemplate = "Upload %s %s '%s' '%s'"
|
||||
iauploadoptions = "--retries=5 --no-derive -H x-archive-keep-old-version:0"
|
||||
|
||||
|
Binary file not shown.
@ -19,21 +19,24 @@
|
||||
# and this version (0.4.12) made into the main line version
|
||||
# because 4.14 was developing in a direction that doesn't fit
|
||||
# with the changes made to the HPR system in June/July 2023.
|
||||
# Will now move forward with version numbers.
|
||||
# Will now move forward with version numbers (and will get
|
||||
# a duplicate).
|
||||
# 2024-01-23: Added the 'open' pragma for UTF-8
|
||||
# 2024-07-08: Fixed a bug where the top-level directory was
|
||||
# being added to assets paths. See the definition of $linkre for
|
||||
# more detals.
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.4.14
|
||||
# CREATED: 2014-06-13 12:51:04
|
||||
# REVISION: 2024-01-23 16:28:59
|
||||
# REVISION: 2024-07-08 15:21:02
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
use 5.010;
|
||||
use strict;
|
||||
use warnings;
|
||||
use open ':encoding(UTF-8)';
|
||||
#use utf8;
|
||||
use open ':std', ':encoding(UTF-8)';
|
||||
|
||||
use Carp;
|
||||
use Getopt::Long;
|
||||
@ -1527,8 +1530,21 @@ sub find_links_in_notes {
|
||||
# http://www.hackerpublicradio.org/eps/hpr1303/Music_Notes.html
|
||||
# Also things like this (**Why Ken?**)
|
||||
# ../eps/hpr2945/IMG_20191018_122746Z.jpg
|
||||
# Don't match things like when *not* processing 1986:
|
||||
# Don't match things like this when *not* processing 1986:
|
||||
# http://hackerpublicradio.org/eps/hpr1986/full_shownotes.html#example-2
|
||||
# ----------------------------------------------------------------------
|
||||
# NOTE: 2024-07-08
|
||||
#
|
||||
# It used to be that we added a top-level hprXXXX directory to URLs
|
||||
# because there wasn't one on the HPR server. This was because the
|
||||
# majority of shows without assets had no files; the notes were taken from
|
||||
# the database and displayed dynamically.
|
||||
#
|
||||
# Now all HPR shows have a top-level directory for holding the index.html
|
||||
# with the pre-created notes page. So we DO NOT want to create that
|
||||
# top-level part. The RE below matches but doesn't store it or we'd get
|
||||
# one too many directory levels.
|
||||
# ----------------------------------------------------------------------
|
||||
#
|
||||
$epstr = sprintf( "hpr%04d", $episode );
|
||||
# my $re
|
||||
@ -1537,6 +1553,7 @@ sub find_links_in_notes {
|
||||
^https?://
|
||||
(?:www.)?
|
||||
(?:hacker|hobby)publicradio.org/eps/
|
||||
$epstr/
|
||||
(.+)$
|
||||
}x;
|
||||
|
||||
@ -1558,7 +1575,7 @@ sub find_links_in_notes {
|
||||
_debug( $DEBUG >= 3, "\$uri = $uri\n" );
|
||||
_debug( $DEBUG >= 3, "\$uri->fragment = " . $uri->fragment )
|
||||
if $uri->fragment;
|
||||
_debug( $DEBUG >= 3, "\$slink = $slink, \n" );
|
||||
_debug( $DEBUG >= 3, "\$slink = $slink\n" );
|
||||
|
||||
#
|
||||
# Is it an HPR link?
|
||||
@ -1760,7 +1777,7 @@ sub find_links_in_file {
|
||||
# http://www.hackerpublicradio.org/eps/hpr1303/Music_Notes.html
|
||||
# Also things like this (**Why Ken?**)
|
||||
# ../eps/hpr2945/IMG_20191018_122746Z.jpg
|
||||
# Don't match things like when *not* processing 1986:
|
||||
# Don't match things like this when *not* processing 1986:
|
||||
# http://hackerpublicradio.org/eps/hpr1986/full_shownotes.html#example-2
|
||||
#
|
||||
$epstr = sprintf( "hpr%04d", $episode );
|
||||
|
627
InternetArchive/repair_assets
Executable file
627
InternetArchive/repair_assets
Executable file
@ -0,0 +1,627 @@
|
||||
#!/bin/bash -
|
||||
#===============================================================================
|
||||
#
|
||||
# FILE: repair_assets
|
||||
#
|
||||
# USAGE: ./repair_assets showid
|
||||
#
|
||||
# DESCRIPTION: Given a show where there was a directory of asset files on the
|
||||
# old HPR server whichj got lost in the migration, rebuild it
|
||||
# and fill it with assets from the IA. Modify the show notes to
|
||||
# point to these recovered assets.
|
||||
#
|
||||
# OPTIONS: ---
|
||||
# REQUIREMENTS: ---
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.6
|
||||
# CREATED: 2024-05-10 21:26:31
|
||||
# REVISION: 2024-07-10 15:12:54
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
# set -o nounset # Treat unset variables as an error
|
||||
|
||||
VERSION="0.0.6"
|
||||
|
||||
SCRIPT=${0##*/}
|
||||
# DIR=${0%/*}
|
||||
|
||||
STDOUT="/dev/fd/2"
|
||||
|
||||
#
|
||||
# Select the appropriate working directory for the host
|
||||
#
|
||||
case $(hostname) in
|
||||
i7-desktop)
|
||||
BASEDIR="$HOME/HPR/InternetArchive"
|
||||
;;
|
||||
borg)
|
||||
BASEDIR="$HOME/IA"
|
||||
;;
|
||||
*)
|
||||
echo "Wrong host!"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }
|
||||
|
||||
#
|
||||
# Load library functions
|
||||
#
|
||||
LIB="$HOME/HPR/function_lib.sh"
|
||||
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
|
||||
# shellcheck disable=SC1090
|
||||
source "$LIB"
|
||||
|
||||
#
|
||||
# Enable coloured messages
|
||||
#
|
||||
define_colours
|
||||
|
||||
#
|
||||
# Sanity checks
|
||||
#
|
||||
IA=$(command -v ia)
|
||||
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
|
||||
Q2T=$(command -v query2tt2)
|
||||
[ -n "$Q2T" ] || { echo "Program 'query2tt2' was not found"; exit 1; }
|
||||
FIXAL="$BASEDIR/fix_asset_links"
|
||||
[ -e "$FIXAL" ] || { echo "Program '$FIXAL' was not found"; exit 1; }
|
||||
|
||||
#
|
||||
# Make temporary files and set traps to delete them
|
||||
#
|
||||
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
|
||||
TMP2=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
|
||||
trap 'cleanup_temp $TMP1 $TMP2' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
|
||||
|
||||
# {{{ -- Functions -- _verbose, _usage, _log, find_missing, make_dir
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: find_missing
|
||||
# DESCRIPTION: Given two arrays containing IA assets and HPR assets,
|
||||
# determine which IA assets are missing from the HPR list.
|
||||
# PARAMETERS: $1 (nameref) IA list
|
||||
# $2 (nameref) HPR list
|
||||
# $3 Name of array to receive list of missing assets
|
||||
# RETURNS: Nothing
|
||||
#===============================================================================
|
||||
find_missing () {
|
||||
local -n IA="${1}"
|
||||
local -n HPR="${2}"
|
||||
local output="${3}"
|
||||
|
||||
local -A hIA hHPR
|
||||
local i key
|
||||
|
||||
#
|
||||
# Make a hash keyed by the IA file base names from an indexed array
|
||||
#
|
||||
for (( i=0; i<${#IA[@]}; i++ )); do
|
||||
hIA+=([${IA[$i]##*/}]=${IA[$i]})
|
||||
done
|
||||
|
||||
#
|
||||
# Make a hash keyed by the HPR file base names from an indexed array
|
||||
#
|
||||
for (( i=0; i<${#HPR[@]}; i++ )); do
|
||||
hHPR+=([${HPR[$i]##*/}]=${HPR[$i]})
|
||||
done
|
||||
|
||||
#
|
||||
# Use the basename keys to check what's missing, but return the full path
|
||||
# names.
|
||||
#
|
||||
for key in "${!hIA[@]}"; do
|
||||
if ! exists_in hHPR "$key"; then
|
||||
eval "$output+=('${hIA[$key]}')"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: make_dir
|
||||
# DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on
|
||||
# errors.
|
||||
# PARAMETERS: $1 directory path
|
||||
# RETURNS: True if success, otherwise exits the caller script
|
||||
#===============================================================================
|
||||
make_dir () {
|
||||
local dir="${1}"
|
||||
|
||||
if [[ ! -d $dir ]]; then
|
||||
mkdir -p "$dir" || {
|
||||
coloured 'red' "Failed to create $dir"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: _verbose
|
||||
# DESCRIPTION: Writes a message in verbose mode
|
||||
# PARAMETERS: * message strings to write
|
||||
# RETURNS: Nothing
|
||||
#===============================================================================
|
||||
_verbose () {
|
||||
[ "$VERBOSE" -eq 0 ] && return
|
||||
for msg; do
|
||||
printf '%s\n' "$msg"
|
||||
done
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: _log
|
||||
# DESCRIPTION: Appends a record to the file "$LOGFILE"
|
||||
# PARAMETERS: $1 Message to write
|
||||
# RETURNS: Nothing
|
||||
#===============================================================================
|
||||
_log () {
|
||||
local message="${1}"
|
||||
|
||||
echo "$(date +%F\ %T) $message" >> "$LOGFILE"
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: _usage
|
||||
# DESCRIPTION: Reports usage; always exits the script after doing so
|
||||
# PARAMETERS: 1 - the integer to pass to the 'exit' command
|
||||
# RETURNS: Nothing
|
||||
#===============================================================================
|
||||
_usage () {
|
||||
local -i result=${1:-0}
|
||||
|
||||
cat >$STDOUT <<-endusage
|
||||
${SCRIPT} - version: ${VERSION}
|
||||
|
||||
Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] showid
|
||||
|
||||
Attempts to repair an show where the directory of assets was not transferred
|
||||
from the old HPR server.
|
||||
|
||||
Options:
|
||||
-h Print this help
|
||||
-v Run in verbose mode where more information is
|
||||
reported. Default is off. If -v is repeated it
|
||||
increases the verbosity level (levels 1 and 2 only).
|
||||
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
|
||||
mode where nothing is changed but the actions that
|
||||
will be taken are reported; -d 0 turns off dry-run
|
||||
mode and the actions will be carried out.
|
||||
-D Run in debug mode where a lot more information is
|
||||
reported
|
||||
|
||||
Arguments:
|
||||
showid The show id in the form 'hpr1234'
|
||||
|
||||
endusage
|
||||
exit "$result"
|
||||
}
|
||||
|
||||
# }}}
|
||||
|
||||
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Directories and files
|
||||
#-------------------------------------------------------------------------------
|
||||
LOGS="$BASEDIR/logs"
|
||||
make_dir "${LOGS}"
|
||||
LOGFILE="$LOGS/$SCRIPT.log"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Options
|
||||
#-------------------------------------------------------------------------------
|
||||
# Default settings
|
||||
#
|
||||
VERBOSE=0
|
||||
|
||||
#
|
||||
# Process options
|
||||
#
|
||||
while getopts :d:Dhv opt
|
||||
do
|
||||
case "${opt}" in
|
||||
D) DEBUG=1;;
|
||||
d) DRYRUN=$OPTARG;;
|
||||
h) _usage 0;;
|
||||
v) ((VERBOSE++));;
|
||||
*) echo "** Unknown option"
|
||||
_usage 1;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND - 1))
|
||||
|
||||
#
|
||||
# Set option defaults and check their values
|
||||
#
|
||||
DRYRUN=${DRYRUN:-1}
|
||||
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
|
||||
coloured 'red' "** Use '-d 0' or '-d 1'"
|
||||
_usage 1
|
||||
fi
|
||||
[[ $VERBOSE -gt 0 && $DRYRUN -eq 1 ]] && echo "Dry run mode"
|
||||
|
||||
DEBUG=${DEBUG:-0}
|
||||
[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Argument check
|
||||
#-------------------------------------------------------------------------------
|
||||
# Should have one argument
|
||||
#
|
||||
if [[ $# != 1 ]]; then
|
||||
coloured 'red' "Missing argument"
|
||||
_usage 1
|
||||
fi
|
||||
show="${1,,}"
|
||||
|
||||
#
|
||||
# Ensure show id is correctly formatted. We want it to be 'hpr1234'
|
||||
#
|
||||
if [[ $show =~ (hpr)?([0-9]+) ]]; then
|
||||
printf -v show 'hpr%04d' "${BASH_REMATCH[2]}"
|
||||
else
|
||||
coloured 'red' "Incorrect show specification: $show"
|
||||
coloured 'yellow' "Use 'hpr9999' or '9999' format"
|
||||
exit 1
|
||||
fi
|
||||
_DEBUG "Parsed item: $show"
|
||||
echo "Processing show $show"
|
||||
_log "Processing show $show; dry-run: $([ "$DRYRUN" -eq 1 ] && echo "on" || echo "off")"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Declarations and constants
|
||||
#-------------------------------------------------------------------------------
|
||||
declare -a iacache
|
||||
|
||||
#
|
||||
# SHOWURL is where the show will be on the webserver
|
||||
#
|
||||
printf -v SHOWURL 'https://hackerpublicradio.org/eps/%s/index.html' "$show"
|
||||
|
||||
#
|
||||
# CACHEDIR is where we store asset details and files
|
||||
#
|
||||
CACHEDIR="$BASEDIR/assets"
|
||||
[ ! -d "$CACHEDIR" ] && {
|
||||
coloured 'red' "Creating cache directory"
|
||||
make_dir "$CACHEDIR"
|
||||
}
|
||||
|
||||
#
|
||||
# Pointers into the cache:
|
||||
# LOCAL_ASSETDIR - where the cache for this show lives
|
||||
# LOCAL_FILEDIR - where the IA files have been placed
|
||||
# LOCAL_PARENTDIR - the equivalent directory to the top show dir
|
||||
#
|
||||
LOCAL_ASSETDIR="$CACHEDIR/${show}"
|
||||
LOCAL_FILEDIR="$LOCAL_ASSETDIR/files"
|
||||
LOCAL_PARENTDIR="$LOCAL_FILEDIR/${show}"
|
||||
|
||||
#
|
||||
# Pointers to the HPR server directories:
|
||||
# REMOTE_ASSETDIR - where the assets are to go
|
||||
# REMOTE_PARENTDIR - the remote parent directory
|
||||
#
|
||||
REMOTE_ASSETDIR="public_html/eps/${show}/${show}"
|
||||
REMOTE_PARENTDIR="public_html/eps/${show}"
|
||||
|
||||
CMDTPL='ssh hpr@hackerpublicradio.org %s'
|
||||
|
||||
MANIFEST="$CACHEDIR/$show/manifest"
|
||||
DBNOTES="$CACHEDIR/$show/notes.html"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Check the show exists in the database (or is visible on the website).
|
||||
#-------------------------------------------------------------------------------
|
||||
_verbose "Checking the show exists on the HPR server"
|
||||
result=$(curl --head --silent --write-out "%{http_code}" --output /dev/null "$SHOWURL")
|
||||
if [[ $result -eq 404 ]]; then
|
||||
coloured 'red' "Could not detect show '$show' on the HPR server"
|
||||
_log "Show '$show' not on the HPR server"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Check the show exists on the IA
|
||||
#-------------------------------------------------------------------------------
|
||||
_verbose "Checking the show exists on the IA server"
|
||||
if ! ia metadata "$show" --exists > /dev/null 2>&1; then
|
||||
coloured 'red' "Could not detect show '$show' on the IA server"
|
||||
coloured 'yellow' "Check that archive.org is available"
|
||||
coloured 'yellow' "Try https://downfor.io/internet-archive"
|
||||
_log "Show '$show' not on the IA server"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Check IA, collect contents, classify them
|
||||
#-------------------------------------------------------------------------------
|
||||
# Interrogate the IA for the required item contents. If it returns True we can
|
||||
# collect its contents, otherwise we can't proceed. The file 'TMP1' contains
|
||||
# just a simple list of the files on the IA relating to this item.
|
||||
#
|
||||
_verbose "Collecting filenames from the IA server"
|
||||
if ia list "$show" > "$TMP1"; then
|
||||
while read -r iafile; do
|
||||
iacache+=("$iafile")
|
||||
done < "$TMP1"
|
||||
else
|
||||
coloured 'red' "Item $show can't be found on the IA"
|
||||
coloured 'red' "Can't continue"
|
||||
_log "Files for show '$show' not on the IA server"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
_DEBUG "$(printf '%s\n' "${iacache[@]}")"
|
||||
|
||||
#
|
||||
# Determine which files are assets
|
||||
#
|
||||
_verbose "Categorising files held on the IA"
|
||||
|
||||
declare -a audio ia_transcript ia_asset
|
||||
|
||||
audio_re="^${show}\.(flac|mp3|ogg|opus|spx|wav)\$"
|
||||
# transcript_re="^${show}/${show}/${show}\.(json|srt|tsv|txt|vtt)\$"
|
||||
transcript_re="^${show}/${show}\.(json|srt|tsv|txt|vtt)\$"
|
||||
asset_re="^${show}/(${show}/)?.*\$"
|
||||
metadata_re="^(__ia_thumb.jpg|${show}[^/]+\.(afpk|torrent|gz|xml|sqlite|png))\$"
|
||||
|
||||
for file in "${iacache[@]}"; do
|
||||
if [[ $file =~ $audio_re ]]; then
|
||||
audio+=("$file")
|
||||
elif [[ $file =~ $metadata_re ]]; then
|
||||
_verbose "Skipping $file"
|
||||
continue
|
||||
elif [[ $file =~ $transcript_re ]]; then
|
||||
ia_transcript+=("$file")
|
||||
elif [[ $file =~ $asset_re ]]; then
|
||||
ia_asset+=("$file")
|
||||
fi
|
||||
done
|
||||
|
||||
#
|
||||
# Report what was collected at verbosity level 2
|
||||
#
|
||||
if [[ $VERBOSE -gt 1 ]]; then
|
||||
coloured 'cyan' "** audio (${#audio[@]}):"
|
||||
printf '%s\n' "${audio[@]}"
|
||||
|
||||
coloured 'cyan' "** transcript (${#ia_transcript[@]}):"
|
||||
printf '%s\n' "${ia_transcript[@]}"
|
||||
|
||||
coloured 'cyan' "** asset (${#ia_asset[@]}):"
|
||||
printf '%s\n' "${ia_asset[@]}"
|
||||
|
||||
_log "IA asset count for show '$show' = ${#ia_asset[@]}"
|
||||
fi
|
||||
|
||||
#
|
||||
# No assets, no need to proceed!
|
||||
#
|
||||
if [[ ${#ia_asset[@]} -eq 0 ]]; then
|
||||
coloured 'green' "No IA assets found for show $show; nothing to do"
|
||||
_log "Nothing to do for show $show"
|
||||
exit
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Check what's on the HPR server
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# 'rc' is the remote command template
|
||||
#
|
||||
printf -v rc 'find public_html/eps/%s -type f -printf "%s/%%P\\n"' "$show" "$show"
|
||||
|
||||
#
|
||||
# 'command' is the local command we'll run to run a remote command on the HPR
|
||||
# server
|
||||
#
|
||||
# shellcheck disable=SC2059 disable=SC2089
|
||||
printf -v command "$CMDTPL" "'$rc'"
|
||||
|
||||
if [[ $VERBOSE -gt 1 ]]; then
|
||||
echo "Command: $command"
|
||||
fi
|
||||
|
||||
declare -a hpr_asset
|
||||
ignore_re="index.html$"
|
||||
|
||||
#
|
||||
# Run the command and save the output. Save the asset names returned in an
|
||||
# array. TODO: Handle errors from the command
|
||||
#
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
eval "$command" > "$TMP2"
|
||||
RES=$?
|
||||
if [[ $RES -eq 0 ]]; then
|
||||
_verbose "$(coloured 'green' "Remote command successful")"
|
||||
while read -r hprfile; do
|
||||
if [[ ! $hprfile =~ $ignore_re ]]; then
|
||||
hpr_asset+=("${hprfile}")
|
||||
fi
|
||||
done < "$TMP2"
|
||||
_verbose "$(coloured 'green' "Assets found on HPR server = ${#hpr_asset[@]}")"
|
||||
_verbose "$(printf '%s\n' "${hpr_asset[@]}")"
|
||||
_log "Assets found on HPR server = ${#hpr_asset[@]}"
|
||||
else
|
||||
coloured 'red' "Remote command failed"
|
||||
_log "Failed while searching for HPR assets"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
coloured 'yellow' "Would have searched for assets on the HPR server"
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Compare the two asset lists and return what's missing on the HPR server
|
||||
#-------------------------------------------------------------------------------
|
||||
declare -a missing
|
||||
find_missing ia_asset hpr_asset missing
|
||||
_verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")"
|
||||
_verbose "$(printf '%s\n' "${missing[@]}")"
|
||||
|
||||
if [[ ${#missing[@]} -eq 0 ]]; then
|
||||
coloured 'green' "No missing assets detected; nothing to do"
|
||||
_log "No missing assets detected; nothing to do"
|
||||
exit
|
||||
else
|
||||
coloured 'yellow' "Found ${#missing[@]} files missing on the HPR server"
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Prepare to copy the missing files
|
||||
#-------------------------------------------------------------------------------
|
||||
make_dir "$LOCAL_FILEDIR"
|
||||
|
||||
declare -a downloads
|
||||
|
||||
#
|
||||
# Check whether files are already downloaded
|
||||
#
|
||||
for file in "${missing[@]}"; do
|
||||
if [[ ! -e "$LOCAL_FILEDIR/$show/$file" ]]; then
|
||||
downloads+=("$file")
|
||||
fi
|
||||
done
|
||||
|
||||
_verbose "$(coloured 'cyan' "** downloads (${#downloads[@]}):")"
|
||||
_verbose "$(printf '%s\n' "${downloads[@]}")"
|
||||
|
||||
#
|
||||
# If we have files to download get them now
|
||||
#
|
||||
if [[ ${#downloads[@]} -gt 0 ]]; then
|
||||
if [[ $DRYRUN -eq 1 ]]; then
|
||||
coloured 'yellow' "Would have downloaded missing files from the IA"
|
||||
else
|
||||
ia download "$show" --destdir="$LOCAL_FILEDIR" "${downloads[@]}"
|
||||
RES=$?
|
||||
if [[ $RES -eq 0 ]]; then
|
||||
coloured 'green' "Downloads complete"
|
||||
_log "Downloaded IA assets for show $show"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
coloured 'yellow' "IA files are already downloaded"
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC2089
|
||||
RSYNCTPL="rsync -a -e 'ssh' %s hpr@hpr:%s"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Build the 'ssh' command to make a directory
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# Prepare to make the remote directory if necessary.
|
||||
#
|
||||
# - $rc is the remote command we'll run on the server
|
||||
# - $command is the full 'ssh' command including $rc
|
||||
#
|
||||
printf -v rc 'if [ ! -e "%s" ]; then mkdir -p "%s"; fi' \
|
||||
"$REMOTE_ASSETDIR" "$REMOTE_ASSETDIR"
|
||||
|
||||
# shellcheck disable=SC2059 disable=SC2089
|
||||
printf -v command "$CMDTPL" "'$rc'"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Run or report the command that would be run
|
||||
#-------------------------------------------------------------------------------
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
eval "$command"
|
||||
RES=$?
|
||||
if [[ $RES -eq 0 ]]; then
|
||||
coloured 'green' "Remote directory creation successful"
|
||||
else
|
||||
coloured 'red' "Remote directory creation failed"
|
||||
fi
|
||||
else
|
||||
coloured 'yellow' "Would have created the remote directory"
|
||||
echo "$command"
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Synchronise assets to the directory
|
||||
#-------------------------------------------------------------------------------
|
||||
# shellcheck disable=SC2059 disable=SC2089
|
||||
printf -v command "$RSYNCTPL" "$LOCAL_PARENTDIR/" "$REMOTE_PARENTDIR/"
|
||||
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
eval "$command"
|
||||
RES=$?
|
||||
if [[ $RES -eq 0 ]]; then
|
||||
coloured 'green' "Remote upload successful"
|
||||
_log "Uploaded assets for show $show"
|
||||
else
|
||||
coloured 'red' "Remote upload failed"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
coloured 'yellow' "Would have synchronised local assets with the remote directory"
|
||||
echo "$command"
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Make a 'manifest' file if necessary
|
||||
#-------------------------------------------------------------------------------
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
if [[ ! -e $MANIFEST ]]; then
|
||||
find "$LOCAL_PARENTDIR" -type f -printf '%P\n' > "$MANIFEST"
|
||||
_verbose "$(coloured 'green' "Created manifest file")"
|
||||
_log "Created manifest file $MANIFEST"
|
||||
fi
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Save the notes from the database if necessary
|
||||
#-------------------------------------------------------------------------------
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
if [[ ! -e $DBNOTES ]]; then
|
||||
if ! tunnel_is_open; then
|
||||
open_tunnel
|
||||
fi
|
||||
if query2tt2 -config="$BASEDIR/.hpr_livedb.cfg" \
|
||||
-temp="$BASEDIR/query2tt2_nokey.tpl" \
|
||||
-out="$DBNOTES" \
|
||||
-dbarg="${show:3}" \
|
||||
'select notes from eps where id = ?'
|
||||
then
|
||||
_verbose "$(coloured 'green' "Created notes file")"
|
||||
_log "Created notes file $DBNOTES"
|
||||
else
|
||||
_verbose "$(coloured 'red' "Creation of notes file failed")"
|
||||
_log "Creation of notes file $DBNOTES failed"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Adjust the notes with 'fix_asset_links'
|
||||
#-------------------------------------------------------------------------------
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
echo "$FIXAL"
|
||||
# $FIXAL
|
||||
fi
|
||||
|
||||
|
||||
#
|
||||
# All done
|
||||
#
|
||||
if [[ $DRYRUN -eq 0 ]]; then
|
||||
_log "Repaired show $show"
|
||||
fi
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# √ Make a place to hold the files on this machine
|
||||
# √ Download them from the IA
|
||||
# √ Make a directory on the HPR server
|
||||
# √ Copy the assets to the HPR server
|
||||
# Modify the notes to point to the assets on the server
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker
|
@ -3,7 +3,7 @@
|
||||
#
|
||||
# FILE: repair_item
|
||||
#
|
||||
# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] itemname
|
||||
# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] itemname
|
||||
#
|
||||
# DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during
|
||||
# the upload.
|
||||
@ -18,20 +18,32 @@
|
||||
# temporarily on 'borg') and determines which have not been
|
||||
# uploaded, then takes steps to perform the uploads.
|
||||
#
|
||||
# Version 0.0.10 onwards has the capability to repair an IA item
|
||||
# from the HPR backup disk. This seems to be necessary because
|
||||
# the transcripts were not carried over (although we are
|
||||
# adding them to the IA for new shows now, older ones were never
|
||||
# copied), and there has been a case where none of the assets
|
||||
# were on the IA. The method used it to place the backup files
|
||||
# in the directory 'repairs' under the local IA or
|
||||
# InternetArchive directory. The files are held in the hierarchy
|
||||
# '$item/$item/'. The assets are in the lower directory and the
|
||||
# source file is in the upper one. This emulates the placement
|
||||
# on the IA itself.
|
||||
#
|
||||
# OPTIONS: ---
|
||||
# REQUIREMENTS: ---
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.9
|
||||
# VERSION: 0.0.10
|
||||
# CREATED: 2020-01-05 22:42:46
|
||||
# REVISION: 2024-06-14 18:03:58
|
||||
# REVISION: 2024-07-12 14:39:38
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
#set -o nounset # Treat unset variables as an error
|
||||
|
||||
VERSION="0.0.9"
|
||||
VERSION="0.0.10"
|
||||
|
||||
SCRIPT=${0##*/}
|
||||
# DIR=${0%/*}
|
||||
@ -45,10 +57,12 @@ case $(hostname) in
|
||||
i7-desktop)
|
||||
BASEDIR="$HOME/HPR/InternetArchive"
|
||||
UPLOADS="$HOME/HPR/IA/uploads"
|
||||
REPAIRS="$BASEDIR/repairs"
|
||||
;;
|
||||
borg)
|
||||
BASEDIR="$HOME/IA"
|
||||
UPLOADS="/data/IA/uploads"
|
||||
REPAIRS="$BASEDIR/repairs"
|
||||
;;
|
||||
*)
|
||||
echo "Wrong host!"
|
||||
@ -185,7 +199,7 @@ _usage () {
|
||||
cat >$STDOUT <<-endusage
|
||||
${SCRIPT} - version: ${VERSION}
|
||||
|
||||
Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] item
|
||||
Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] item
|
||||
|
||||
Attempts to repair an IA item where the upload has failed for some reason.
|
||||
|
||||
@ -203,6 +217,12 @@ Options:
|
||||
during one run of the script. The range is 1 to
|
||||
$DEFLIMIT. This can be helpful when there are upload
|
||||
problems.
|
||||
-X Run in "extended" mode. In this mode the directory
|
||||
holding files to be added to the IA is '~/IA/repairs'
|
||||
and the files have most likely come from the HPR
|
||||
backup disk and aren't on the IA due some error. We
|
||||
want to use the capabilities of ${SCRIPT} to repair
|
||||
things and deal with the IA upload problems.
|
||||
|
||||
Arguments:
|
||||
item The item in the form 'hpr1234'
|
||||
@ -229,7 +249,7 @@ DEFLIMIT=20
|
||||
#
|
||||
# Process options
|
||||
#
|
||||
while getopts :d:Dhl:v opt
|
||||
while getopts :d:Dhl:vX opt
|
||||
do
|
||||
case "${opt}" in
|
||||
D) DEBUG=1;;
|
||||
@ -237,6 +257,7 @@ do
|
||||
h) _usage 0;;
|
||||
l) LIMIT=$OPTARG;;
|
||||
v) VERBOSE=1;;
|
||||
X) EXTENDED=1;;
|
||||
*) echo "** Unknown option"
|
||||
_usage 1;;
|
||||
esac
|
||||
@ -264,6 +285,8 @@ if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
|
||||
_usage 1
|
||||
fi
|
||||
|
||||
EXTENDED=${EXTENDED:-0}
|
||||
|
||||
#
|
||||
# Should have one argument
|
||||
#
|
||||
@ -295,6 +318,22 @@ if ! ia metadata "$item" --exists > /dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#
|
||||
# The -X (EXTENDED) mode is for when we have to upload files that have
|
||||
# mysteriously vanished from the IA. The directories here are equivalent to
|
||||
# those used by 'repair_assets'. There is a top-level directory the represents
|
||||
# the IA item, and below that a hierarchy defining placement under the item.
|
||||
# There is a 'repairs' directory per host in case we need to preair IA stuff
|
||||
# from elsewhere.
|
||||
#
|
||||
if [[ $EXTENDED -eq 1 ]]; then
|
||||
coloured 'cyan' "Using 'Extended' mode"
|
||||
if [[ ! -e $REPAIRS ]]; then
|
||||
mkdir -p "$REPAIRS"
|
||||
fi
|
||||
UPLOADS="$REPAIRS/$item"
|
||||
fi
|
||||
|
||||
#
|
||||
# Declarations
|
||||
#
|
||||
|
Loading…
Reference in New Issue
Block a user