Updates for show "repair" processing

InternetArchive/future_upload: Added logging and debugging

InternetArchive/ia_db.sql: Added new tables

InternetArchive/recover_transcripts: New script to run on 'borg' and
    copy missing files from the backup disk to the IA

InternetArchive/repair_assets: More comments, including one about a bug in the design.

InternetArchive/repair_item: Fix relating to octal numbers (if there are
    leading zeroes in a number). '_DEBUG' is now in the function
    library. Added comments to explain obscure stuff.

InternetArchive/snapshot_metadata: New Bash script (to run on my
    desktop) which collects metadata for a show and stores in in the
    '~/HPR/IA/assets' directory. Runs 'view_derivatives' on it to find
    derivative files for deletion.

InternetArchive/tidy_uploaded: Moves files and directories containing
    uploaded files into a holding area for later backup. Added
    debugging, logging and a 'force' mode.

InternetArchive/upload_manager: Manages 'ia.db' (on my workstation).
    Needs many updates which have just started to be added.

InternetArchive/weekly_upload: Old script, now obsolete.
This commit is contained in:
Dave Morriss 2024-08-22 13:13:38 +01:00
parent dc0f29e957
commit 19030fee71
9 changed files with 994 additions and 73 deletions

View File

@ -13,9 +13,9 @@
# NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as
# well as 'update_state'
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.14
# VERSION: 0.0.15
# CREATED: 2021-01-07 12:11:02
# REVISION: 2024-03-03 14:12:30
# REVISION: 2024-07-29 23:17:45
#
#===============================================================================
@ -26,7 +26,7 @@ SCRIPT=${0##*/}
STDOUT="/dev/fd/2"
VERSION="0.0.14"
VERSION="0.0.15"
#
# Load library functions
@ -36,6 +36,8 @@ LIB="$HOME/bin/function_lib.sh"
# shellcheck disable=SC1090
source "$LIB"
# {{{ -- Functions -- check_uploads, _log, _usage
#=== FUNCTION ================================================================
# NAME: check_uploads
# DESCRIPTION: Determines if files exist for uploading
@ -59,6 +61,31 @@ check_uploads () {
return 0
}
#=== FUNCTION ================================================================
# NAME: _log
# DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script
# using the predefined $LOGREC, a template for 'printf'. If the
# latter is not defined the function will use a default.
# For some reason 'shellcheck' objects to this function. The
# first argument to 'printf' needs to be -1 to make the
# '%(fmt)T' use today's date and time.
# PARAMETERS: 1 - the message to write
# RETURNS: Nothing
#===============================================================================
# shellcheck disable=SC2317 disable=SC2059
_log () {
local msg="$1"
# echo "D> $LOGFILE $LOGREC"
[ -v LOGFILE ] || { echo "${FUNCNAME[0]}: \$LOGFILE is not defined"; exit 1; }
[ -v LOGREC ] || { local LOGREC='%(%F %T)T %s\n'; }
# echo "D> $LOGFILE $LOGREC"
printf "$LOGREC" -1 "$msg" >> "$LOGFILE"
return
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
@ -108,18 +135,7 @@ endusage
exit "$res"
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -144,6 +160,7 @@ cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
LOGREC='%(%F %T)T %s\n'
#
# Tools
@ -157,8 +174,8 @@ UPSTATE="$BASEDIR/update_state"
#
# Fallback URL
#
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
# URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
# QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
#
# Prerequisites
@ -575,4 +592,4 @@ fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker

View File

@ -3,11 +3,11 @@
* =========
*
* Schema for SQLite database 'ia.db' used to hold IA upload information
* Last updated: 2022-06-16
* Last updated: 2024-07-15
*
*/
/*
/* ----------------------------------------------------------------------------
* Table: episodes
*
* id show number from HPR
@ -44,7 +44,7 @@ CREATE TABLE episodes (
notes text
);
/*
/* ----------------------------------------------------------------------------
* Table: assets
*
* id primary key
@ -62,7 +62,7 @@ CREATE TABLE assets (
uploaded integer default 0
);
/*
/* ----------------------------------------------------------------------------
* Index: assets_filename_idx
*
* Attempt to constrain duplicates in the assets table
@ -70,7 +70,7 @@ CREATE TABLE assets (
*/
CREATE UNIQUE INDEX assets_filename_idx ON assets (episode_id, filename);
/*
/* ----------------------------------------------------------------------------
* Table: dirlist
*
* id primary key
@ -82,6 +82,66 @@ CREATE TABLE dirlist (
filename text NOT NULL
);
/* ----------------------------------------------------------------------------
* Table: hpr_repairs
*
* episode_id Primary key, foreign key for 'episodes'
* repaired Boolean showing whether the show has been repaired
* repair_date Date of repair
* notes Notes about any anomalies
* asset_count Number of assets (after ignoring transcripts, etc)
*
*/
CREATE TABLE hpr_repairs (
episode_id integer PRIMARY KEY REFERENCES episodes(id),
repaired integer default 0,
repair_date integer default 0,
notes text default null,
asset_count integer default 0
);
/* ----------------------------------------------------------------------------
* Table: ia_repairs
*
* episode_id Primary key, foreign key for 'episodes'
* repaired Boolean showing whether the show has been repaired
* repair_date Date of repair
* notes Notes about any anomalies
*
*/
CREATE TABLE ia_repairs (
episode_id integer PRIMARY KEY REFERENCES episodes(id),
repaired integer default 0,
repair_date integer default 0,
notes text default null
);
/* ----------------------------------------------------------------------------
* Table: show_host_xref
*
* episode_id Foreign key for 'episodes'
* hostid Host number from MySQL database
* hostname Host name from MySQL database
*
*/
CREATE TABLE "show_host_xref" (
"episode_id" integer,
"hostid" integer,
"hostname" text DEFAULT null,
FOREIGN KEY("episode_id") REFERENCES "episodes"("id")
);
/* ----------------------------------------------------------------------------
* Index: show_host_xref_idx
*
* Attempt to constrain duplicates in the show_host_xref table
*
*/
CREATE UNIQUE INDEX "show_host_xref_idx" ON "show_host_xref" (
"episode_id" ASC
);
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_view
*

View File

@ -0,0 +1,590 @@
#!/bin/bash -
#===============================================================================
#
# FILE: recover_transcripts
#
# USAGE: ./recover_transcripts item
#
# DESCRIPTION: Intended to be run on `borg`; collects assets from the
# locally-mounted backup disk and places them in a local
# directory (organised to be compatible with the IA), then
# uploads anything that is missing on the IA.
#
# Version 0.1.* looks for assets in the 'eps/' directory and
# copies them to the cache. Also moves the IA copies so all is
# aligned. Many shows earlier than mid 2019 are likely to need
# this addition.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.1.4
# CREATED: 2024-07-14 13:22:58
# REVISION: 2024-08-20 17:38:19
#
#===============================================================================
# set -o nounset # Treat unset variables as an error
VERSION="0.1.4"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory for the host
#
case $(hostname) in
i7-desktop)
echo "To be run only on 'borg'"
exit 1
;;
borg)
BASEDIR="$HOME/IA"
REPAIRS="$BASEDIR/repairs"
BACKUP="/mnt/backup_disk/HPR/HPR-MIRROR"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#
# Enable coloured messages
#
define_colours
#
# Sanity checks
#
JQ=$(command -v jq)
[ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; }
IA=$(command -v ia)
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
REPIT="$BASEDIR/repair_item"
[ -e "$REPIT" ] || { echo "Program '$REPIT' was not found"; exit 1; }
IADB="$BASEDIR/ia.db"
[ -e "$IADB" ] || { echo "Database '$IADB' was not found"; exit 1; }
# {{{ -- Functions -- _IA_move, queued_tasks, _verbose, _usage
#=== FUNCTION ================================================================
# NAME: _IA_move
# DESCRIPTION: Performs a file move on the IA, with retries if it fails.
# Assumes the existence of functions 'coloured', '_log',
# '_verbose' and '_DEBUG'
# PARAMETERS: $1 IA command to run (as a string)
# $2 The path to move from
# $3 The path to move to
# RETURNS: False if the number of retries is exceeded, otherwise true.
#===============================================================================
_IA_move () {
local from="${1:?Usage _IA_move command from to}"
local to="${2:?Usage _IA_move command from to}"
local retry_threshold=5
local retries=0
local sleeptime=20
local command="ia move \"$from\" \"$to\" --no-derive --no-backup > /dev/null 2>&1"
_DEBUG "$command"
# coloured 'blue' "Moving $from to $to"
#
# Run 'command'. If it succeeds then exit. If it fails enter the 'until'
# loop and report the problem, then sleep and try again. Count the number
# of times this is done, so it doesn't loop forever. If we have reached
# the limit count this as a failure and exit with an error. If we haven't
# retried enough yet, sleep for a while and try again. The intention is to
# catch the case when an upload times out. The 'ia' command is performing
# its own retries per upload when the system is overloaded, but these are
# non-fatal.
#
until eval "$command"; do
coloured 'red' "Failure when moving $from to $to"
((retries++))
_log "$(printf 'Failed to move %s to %s [%d]' "$from" "$to" $retries)"
[ "$retries" -eq "$retry_threshold" ] && {
_verbose \
"$(coloured 'red' "Retry limit reached; abandoning this move")"
return 1
}
_verbose "$(coloured 'blue' "Pausing for $sleeptime seconds and retrying")"
sleep $sleeptime
done # until eval ...
coloured 'green' "Moved $from to $to on the IA"
_log "Moved $from to $to on the IA"
return
}
#=== FUNCTION ================================================================
# NAME: queued_tasks
# DESCRIPTION: Queries the IA for any queued or running tasks for an item.
# Writes the number to STDOUT so it can be captured.
# PARAMETERS: $1 IA item (like hpr1192)
# RETURNS: Nothing
#===============================================================================
queued_tasks () {
local item="${1:?Usage: queued_tasks item}"
local -i count=0
count="$(ia tasks "$item" |\
jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
echo "$count"
return
}
#=== FUNCTION ================================================================
# NAME: make_dir
# DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on
# errors.
# PARAMETERS: $1 directory path
# RETURNS: True if success, otherwise exits the caller script
#===============================================================================
make_dir () {
local dir="${1}"
if [[ ! -d $dir ]]; then
mkdir -p "$dir" || {
coloured 'red' "Failed to create $dir"
exit 1
}
fi
}
#=== FUNCTION ================================================================
# NAME: _ifbool
# DESCRIPTION: Simplifies conditional expressions when they nned to return
# one of two strings. Use as:
# echo "Hello $(_ifbool 1 'World' 'Everyone')" → "Hello World"
# PARAMETERS: $1 Integer being tested. If 1 then it's true, otherwise
# it's false. Non-numeric is treated as 0/false.
# $2 String returned for True
# $3 String returned for False
# RETURNS: Nothing
#===============================================================================
_ifbool () {
local -i _bool="${1:-0}"
local _t="${2:-true}"
local _f="${3:-false}"
if [ "$_bool" -eq 1 ]; then
echo "$_t"
else
echo "$_f"
fi
return
}
#=== FUNCTION ================================================================
# NAME: _log
# DESCRIPTION: Appends a record to the file "$LOGFILE"
# PARAMETERS: $1 Message to write
# RETURNS: Nothing
#===============================================================================
_log () {
local message="${1}"
echo "$(date +%F\ %T) $message" >> "$LOGFILE"
}
#=== FUNCTION ================================================================
# NAME: _verbose
# DESCRIPTION: Writes a message in verbose mode
# PARAMETERS: * message strings to write
# RETURNS: Nothing
#===============================================================================
_verbose () {
[ "$VERBOSE" -eq 0 ] && return
for msg; do
printf '%s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-D] [-F] [-v] item
Attempts to repair an IA item where the upload has failed for some reason.
Options:
-h Print this help.
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is changed but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-D Run in debug mode where a lot more information is
reported.
-F Ignore (some) interlocks that will cause failure, such
as the existence of the local cache directory for the
item being processed.
-v Run in verbose mode where more information is
reported. Default is off.
Arguments:
item The item in the form 'hpr1234'
endusage
exit "$result"
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#-------------------------------------------------------------------------------
# Directories and files
#-------------------------------------------------------------------------------
LOGS="$BASEDIR/logs"
make_dir "${LOGS}"
LOGFILE="$LOGS/$SCRIPT.log"
while getopts :d:DFhv opt
do
case "${opt}" in
D) DEBUG=1;;
d) DRYRUN=$OPTARG;;
F) FORCE=1;;
h) _usage 0;;
v) VERBOSE=1;;
*) echo "** Unknown option"
_usage 1;;
esac
done
shift $((OPTIND - 1))
#
# Set option defaults and check their values
#
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
DEBUG=${DEBUG:-0}
[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode"
FORCE=${FORCE:-0}
VERBOSE=${VERBOSE:-0}
#
# Should have one argument
#
if [[ $# != 1 ]]; then
coloured 'red' "Missing argument"
_usage 1
fi
item="${1}"
#
# Ensure item spec is correctly formatted
#
if [[ $item =~ hpr([0-9]+) ]]; then
printf -v item 'hpr%04d' "$((10#${BASH_REMATCH[1]}))"
else
coloured 'red' "Incorrect show specification: $item"
coloured 'yellow' "Use 'hpr9999' format"
exit 1
fi
_DEBUG "Parsed item: $item"
_log "$SCRIPT $VERSION ($(_ifbool "$DRYRUN" 'dry-run' 'live'))"
#
# Having an entry for the show in 'ia.db' is important, so check there is one
#
SQL="select 1 from episodes where id = ${item:3}"
if [[ $(sqlite3 -list "$IADB" "$SQL" 2>/dev/null) -ne 1 ]]; then
coloured 'red' "Unable to find show $item in the local IA database"
coloured 'yellow' "Can't continue"
exit 1
fi
_verbose "$(coloured 'yellow' "Show $item is in the local IA database")"
_log "Show $item is in the local IA database"
#
# It's possible that the show upload failed before anything was uploaded, even
# the metadata. It's never been seen, but it seems wise to cater for it.
#
# TODO: uncomment below; disabled for speed
if ! ia metadata "$item" --exists > /dev/null 2>&1; then
coloured 'red' "This item is not apparently on the IA; can't continue"
exit 1
fi
_verbose "$(coloured 'yellow' "Show $item is on the IA")"
_log "Show $item is on the IA"
#
# Directory paths
#
FROMPARENTDIR="$BACKUP/public_html/eps"
FROMDIR="$FROMPARENTDIR/$item"
TOPARENTDIR="$REPAIRS/$item"
TOASSETDIR="$TOPARENTDIR/$item"
#
# RE to ignore certain files using 'grep -v -E ...'
#
IGNORE="($item\.(flac|mp3|ogg|opus|spx|wav)$)"
#-------------------------------------------------------------------------------
# Check there are asset files on the backup disk before proceeding. At least
# we need the transcripts. If no files at all we can't continue.
#-------------------------------------------------------------------------------
declare -a BACKUPFILES
mapfile -t BACKUPFILES < \
<(find "$FROMPARENTDIR" -type f -name "$item*" | grep -v -E "${IGNORE}")
_DEBUG "$(coloured 'purple' "Backup files")" "${BACKUPFILES[@]}"
if [[ ! -d $FROMDIR || ${#BACKUPFILES[@]} -eq 0 ]]; then
coloured 'red' "No files found in $FROMDIR"
coloured 'red' "Can't continue!"
exit 1
fi
_log "Files found on backup disk ${#BACKUPFILES[*]}"
#-------------------------------------------------------------------------------
# Make the needed local cache directory for later
#-------------------------------------------------------------------------------
if [[ $FORCE -ne 1 && -e $TOPARENTDIR ]]; then
coloured 'red' "Directory $TOPARENTDIR already exists; can't continue".
coloured 'yellow' 'This implies that all files have been copied already.'
coloured 'yellow' "If you're sure, consider running: '$REPIT -X -d0 $item'"
coloured 'yellow' 'Otherwise, consider running again with option -F.'
exit 1
else
if [[ $DRYRUN -eq 1 ]]; then
coloured 'yellow' "Would have created directory $TOPARENTDIR"
else
mkdir -p "$TOASSETDIR"
_verbose "$(coloured 'yellow' "Created directory $TOASSETDIR")"
_log "Created directory $TOASSETDIR"
fi
fi
#-------------------------------------------------------------------------------
# Collect asset data from the database
#-------------------------------------------------------------------------------
SQL="select filename from assets where episode_id = ${item:3}"
declare -a IADBASSETS
mapfile -t IADBASSETS < <(sqlite3 -list "$IADB" "$SQL" 2>/dev/null)
_DEBUG "$(coloured 'purple' "SQLite IA DB files")" "${IADBASSETS[@]}"
_log "Files found in ia.db ${#IADBASSETS[*]}"
#-------------------------------------------------------------------------------
# Collect IA data, only original files generated by HPR. We exclude audio
# files from this set.
#-------------------------------------------------------------------------------
JQPROG='.files[] | select(.source == "original" and .format != "Metadata" and '
JQPROG+='.format != "Item Tile") | (.name) | @text'
declare -a IAFILES
mapfile -t IAFILES < \
<(ia metadata "$item" | $JQ -r "$JQPROG" | grep -v -E "${IGNORE}")
_DEBUG "$(coloured 'purple' "IA files (originals)")" "${IAFILES[@]}"
_log "Files found on IA (originals) ${#IAFILES[*]}"
#-------------------------------------------------------------------------------
# Work out whether to copy assets from the backup disk, or whether to move
# files on the IA. Whatever we decide we also need to copy transcripts from
# the backup disk and upload to the IA
#-------------------------------------------------------------------------------
#
# Check each asset from the $IADB database to see if it's on the IA. We'll get back
# a path if it's where we want it, otherwise just a filename.
#
declare -a MOVES
coloured 'purple' "Checking IA files for moves"
#
# If we find an asset by looking for its basename in the list of files we got
# from the IA and if they are the same we need to move such files to the
# sub-directory.
#
for asset in "${IAFILES[@]}"; do
#
# Skip IA files with directories
#
if [[ $asset =~ / ]]; then
continue
fi
IA_match=$( grep "${asset}" <(printf '%s\n' "${IADBASSETS[@]}") )
if [[ $IA_match = "$asset" ]]; then
MOVES+=("$IA_match")
fi
done
#
# If we found any moves then we can move them in the IA item now and copy the
# files from the backup disk to the cache in case we need them. They will
# eventually get deleted by 'cron'.
#
if [[ ${#MOVES[@]} -gt 0 ]]; then
_DEBUG "$(coloured 'purple' "Files to be moved")" "${MOVES[@]}" "----"
mcount=0
for asset in "${MOVES[@]}"; do
# source & destination for IA moves
iafrom="$item/$asset"
iato="$item/$item/$asset"
_DEBUG "\$iafrom: $iafrom" "\$iato: $iato" ""
#
# If IA source and destination are the same no moves are needed. For
# the local cache the later 'rsync' will be enough.
#
if [[ $iafrom != "$iato" ]]; then
if [[ $DRYRUN -eq 1 ]]; then
coloured 'yellow' "ia move $iafrom $iato --no-derive --no-backup"
coloured 'yellow' "cp $FROMPARENTDIR/$asset $TOASSETDIR/"
else
#
# Perform the move. If the retries are exceeded things get
# complicated, so just abort so we can try again later.
#
_verbose "$(coloured 'blue' "Moving $iafrom → $iato on IA")"
if _IA_move "$iafrom" "$iato"; then
#
# Update the cache (but only if the move occurred)
#
_verbose "$(coloured 'blue' "Copying from backup disk to cache")"
cp "$FROMPARENTDIR/$asset" "$TOASSETDIR/"
((mcount++))
else
coloured 'red' "Retries exhausted. Aborting recovery"
exit 1
fi
fi
fi
done
#
# Report what was done
#
coloured 'green' "Moved $mcount $(ngettext file files "$mcount")"
_log "Moved $mcount $(ngettext file files "$mcount")"
else
coloured 'yellow' "No moves needed"
_log "No moves needed"
fi
#
# Wait for the IA moves to finish
#
if [[ $DRYRUN -eq 0 ]]; then
if [[ $mcount -gt 0 ]]; then
until [[ $(queued_tasks "$item") -eq 0 ]]; do
coloured 'yellow' "Waiting for IA tasks to complete"
sleep 1m
done
fi
else
if [[ $mcount -gt 0 ]]; then
coloured 'yellow' "Would have waited for any IA tasks to complete"
fi
fi
#-------------------------------------------------------------------------------
# Copy files from the backup disk to the cache
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 1 ]]; then
coloured 'yellow' "Would have copied files from backup disk → cache"
rsync -n -vaP --exclude=index.html "$FROMDIR" "$TOPARENTDIR"
else
rsync -vaP --exclude=index.html "$FROMDIR" "$TOPARENTDIR"
_verbose "$(coloured 'yellow' "Copied files from $FROMDIR")"
_log "Copied files from $FROMDIR"
fi
# TODO: Is this needed?
#
# Put any source audio in the right place.
#
# if [[ $DRYRUN -eq 1 ]]; then
# coloured 'yellow' "Would have moved source files if found"
# else
# #
# # Turn on 'nullglob' to get an empty result if the glob expression doesn't
# # match.
# #
# NG=$(shopt -p nullglob)
# shopt -s nullglob
#
# #
# # Any source files should be in repairs/hpr1234/ and should go to the IA
# # in the comparable place. We will not put it on the HPR server though.
# #
# # TODO: Is this right?
# movecount=0
# for file in "$TOPARENTDIR"/*_source.*; do
# if mv "$file" "$TOPARENTDIR"; then
# ((movecount++))
# fi
# done
#
# eval "$NG"
#
# #
# # Show the directories after any move
# #
# if [[ $movecount -gt 0 ]]; then
# _verbose "$(coloured 'yellow' "Moved source file(s)")"
# ls -lR "$REPAIRS/$item/"
# fi
#
# fi
#-------------------------------------------------------------------------------
# Using the cache as the reference upload whatever is missing to the IA
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 1 ]]; then
coloured 'yellow' "Would have found and repaired missing files"
else
_verbose "$(coloured 'yellow' "Finding and repairing missing files")"
_log "Finding and repairing missing files (with $REPIT)"
"$REPIT" -X -d0 "$item"
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker

View File

@ -15,15 +15,15 @@
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.6
# VERSION: 0.0.7
# CREATED: 2024-05-10 21:26:31
# REVISION: 2024-07-10 15:12:54
# REVISION: 2024-08-04 19:40:52
#
#===============================================================================
# set -o nounset # Treat unset variables as an error
VERSION="0.0.6"
VERSION="0.0.7"
SCRIPT=${0##*/}
# DIR=${0%/*}
@ -357,7 +357,7 @@ else
exit 1
fi
_DEBUG "$(printf '%s\n' "${iacache[@]}")"
_DEBUG "IA cache" "${iacache[@]}"
#
# Determine which files are assets
@ -461,6 +461,10 @@ fi
#-------------------------------------------------------------------------------
# Compare the two asset lists and return what's missing on the HPR server
#-------------------------------------------------------------------------------
# TODO: This algorithm does not handle the instance where there are pictures
# in one directory and a lower directory containing thumbnails, AND THE FILE
# NAMES ARE THE SAME!
#
declare -a missing
find_missing ia_asset hpr_asset missing
_verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")"
@ -471,7 +475,8 @@ if [[ ${#missing[@]} -eq 0 ]]; then
_log "No missing assets detected; nothing to do"
exit
else
coloured 'yellow' "Found ${#missing[@]} files missing on the HPR server"
coloured 'yellow' \
"Found ${#missing[@]} $(ngettext file files ${#missing[@]}) missing on the HPR server"
fi
#-------------------------------------------------------------------------------
@ -548,6 +553,10 @@ fi
#-------------------------------------------------------------------------------
# Synchronise assets to the directory
#-------------------------------------------------------------------------------
# We perform an 'rsync' over 'ssh' to synchronise files from
# ~/HPR/InternetArchive/assets/hprXXXX/files/hprXXXX to
# public_html/eps/hprXXXX (on the HPR server)
#
# shellcheck disable=SC2059 disable=SC2089
printf -v command "$RSYNCTPL" "$LOCAL_PARENTDIR/" "$REMOTE_PARENTDIR/"

View File

@ -6,19 +6,24 @@
# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] itemname
#
# DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during
# the upload.
# the upload (and when recovering deleted files from the
# changeover to the HPR static site).
#
# The most common failures are caused by the file upload
# processes timing out and being aborted (by the 'ia' tool which
# performs the item creation and the uploads). This failure
# means that a show being processed on 'borg' does not get all
# of the components loaded to the IA.
# of the components loaded to the IA. This happens during the
# sequence of running the 'make_metadata' Perl script which
# generates a CSV file of show data, followed by 'ia metadata
# --spreadsheet=<CSV file>'. Failures in the second part cause
# it to be aborted
#
# This script looks at the files belonging to the show (stored
# temporarily on 'borg') and determines which have not been
# uploaded, then takes steps to perform the uploads.
#
# Version 0.0.10 onwards has the capability to repair an IA item
# Version 0.0.11 onwards has the capability to repair an IA item
# from the HPR backup disk. This seems to be necessary because
# the transcripts were not carried over (although we are
# adding them to the IA for new shows now, older ones were never
@ -30,20 +35,24 @@
# source file is in the upper one. This emulates the placement
# on the IA itself.
#
# This script can be called directly to recover a new show which
# failed during creation/upload, or by 'recover_transcripts'
# which is repairing shows with missing assets.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.10
# VERSION: 0.0.11
# CREATED: 2020-01-05 22:42:46
# REVISION: 2024-07-12 14:39:38
# REVISION: 2024-07-20 17:06:10
#
#===============================================================================
#set -o nounset # Treat unset variables as an error
VERSION="0.0.10"
VERSION="0.0.11"
SCRIPT=${0##*/}
# DIR=${0%/*}
@ -55,6 +64,7 @@ STDOUT="/dev/fd/2"
#
case $(hostname) in
i7-desktop)
# TODO: consider not allowing this to be run anywhere but on 'borg'
BASEDIR="$HOME/HPR/InternetArchive"
UPLOADS="$HOME/HPR/IA/uploads"
REPAIRS="$BASEDIR/repairs"
@ -100,7 +110,7 @@ TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1;
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
# {{{ -- Functions -- Upload, exists_in, queued_tasks, _DEBUG, _usage
# {{{ -- Functions -- Upload, exists_in, queued_tasks, _usage
#=== FUNCTION ================================================================
# NAME: Upload
@ -174,19 +184,6 @@ queued_tasks () {
return
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
@ -297,10 +294,11 @@ fi
item="${1}"
#
# Ensure item spec is correctly formatted
# Ensure item spec is correctly formatted. Have to cater for leading zeroes
# being interpreted as octal.
#
if [[ $item =~ hpr([0-9]+) ]]; then
printf -v item 'hpr%04d' "${BASH_REMATCH[1]}"
printf -v item 'hpr%04d' "$((10#${BASH_REMATCH[1]}))"
else
coloured 'red' "Incorrect show specification: $item"
coloured 'yellow' "Use 'hpr9999' format"
@ -310,7 +308,7 @@ _DEBUG "Parsed item: $item"
#
# It's possible that the show upload failed before anything was uploaded, even
# the metadata. It's never been seen, but it seems wise to cater for it.
# the metadata. It's rarely seen, but it seems wise to cater for it.
#
if ! ia metadata "$item" --exists > /dev/null 2>&1; then
coloured 'red' "This item is not apparently on the IA; can't continue"
@ -323,7 +321,7 @@ fi
# mysteriously vanished from the IA. The directories here are equivalent to
# those used by 'repair_assets'. There is a top-level directory the represents
# the IA item, and below that a hierarchy defining placement under the item.
# There is a 'repairs' directory per host in case we need to preair IA stuff
# There is a 'repairs' directory per host in case we need to repair IA stuff
# from elsewhere.
#
if [[ $EXTENDED -eq 1 ]]; then

197
InternetArchive/snapshot_metadata Executable file
View File

@ -0,0 +1,197 @@
#!/bin/bash -
#===============================================================================
#
# FILE: snapshot_metadata
#
# USAGE: ./snapshot_metadata episode_number
#
# DESCRIPTION: Collects metadata from the IA for a given show and stores it
# in the cache.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2024-08-16 20:36:51
# REVISION: 2024-08-17 10:31:15
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.2"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory for the host
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
;;
borg)
BASEDIR="$HOME/IA"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }
#
# Load library functions
#
LIB="$HOME/HPR/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#
# Enable coloured messages
#
define_colours
#
# Sanity checks
#
IA=$(command -v ia)
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
VIEWD="$BASEDIR/view_derivatives"
[ -e "$VIEWD" ] || { echo "Program '$VIEWD' was not found"; exit 1; }
# {{{ -- Functions -- _usage
#=== FUNCTION ================================================================
# NAME: make_dir
# DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on
# errors.
# PARAMETERS: $1 directory path
# RETURNS: True if success, otherwise exits the caller script
#===============================================================================
make_dir () {
local dir="${1}"
if [[ ! -d $dir ]]; then
mkdir -p "$dir" || {
coloured 'red' "Failed to create $dir"
exit 1
}
fi
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} showid
Collects notes for a show and adds them to the cache directory
Arguments:
showid The show id in the form 'hpr1234'
endusage
exit "$result"
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#-------------------------------------------------------------------------------
# Argument check
#-------------------------------------------------------------------------------
# Should have one argument
#
if [[ $# != 1 ]]; then
coloured 'red' "Missing argument"
_usage 1
fi
show="${1,,}"
#
# Ensure show id is correctly formatted. We want it to be 'hpr1234'
#
if [[ $show =~ (hpr)?([0-9]+) ]]; then
printf -v show 'hpr%04d' "${BASH_REMATCH[2]}"
else
coloured 'red' "Incorrect show specification: $show"
coloured 'yellow' "Use 'hpr9999' or '9999' format"
exit 1
fi
#-------------------------------------------------------------------------------
# Setting up paths
#-------------------------------------------------------------------------------
#
# CACHEDIR is where we store asset details and files
#
CACHEDIR="$BASEDIR/assets"
[ ! -d "$CACHEDIR" ] && {
coloured 'red' "Creating cache directory"
make_dir "$CACHEDIR"
}
#
# Pointers into the cache:
# LOCAL_ASSETDIR - where the cache for this show lives
#
LOCAL_ASSETDIR="$CACHEDIR/${show}"
[ ! -d "$LOCAL_ASSETDIR" ] && {
coloured 'green' "Creating cache directory for $show"
make_dir "$LOCAL_ASSETDIR"
}
METADATA="$CACHEDIR/$show/metadata.json"
DERIVED="$CACHEDIR/$show/derived.lis"
#-------------------------------------------------------------------------------
# Save the IA metadata unless we already have the file
#-------------------------------------------------------------------------------
if [[ ! -e $METADATA ]]; then
if ia metadata "$show" > "$METADATA"; then
coloured 'green' "Created metadata file"
if [[ ! -s $METADATA ]]; then
coloured 'red' "Metadata file is empty"
fi
else
coloured 'red' "Creation of metadata file failed"
exit 1
fi
else
coloured 'yellow' "Metadata already exists, not replacing it"
fi
#-------------------------------------------------------------------------------
# Use the collected metadata to view the state of the IA, and collect the derived file names
#-------------------------------------------------------------------------------
coloured 'blue' "Viewing IA files"
"$VIEWD" -verb "$METADATA"
if "$VIEWD" -list "$METADATA" > "$DERIVED"; then
nfiles="$(wc -l < "$DERIVED")"
coloured 'green' "Saved 'derived' files for show $show ($nfiles)"
else
coloured 'red' "Creation of $DERIVED file failed"
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker

View File

@ -13,15 +13,15 @@
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.10
# VERSION: 0.0.11
# CREATED: 2022-03-30 17:38:01
# REVISION: 2022-07-30 14:30:43
# REVISION: 2024-07-29 18:24:26
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.10"
VERSION="0.0.11"
SCRIPT=${0##*/}
# DIR=${0%/*}
@ -55,6 +55,8 @@ case $HOSTNAME in
*) echo "Wrong host!"; exit 1 ;;
esac
# {{{ -- Functions -- exists_in, queued_tasks, movefile, is_empty, _log, _usage
#=== FUNCTION ================================================================
# NAME: exists_in
# DESCRIPTION: Checks the existence of a key in an associative array
@ -99,9 +101,12 @@ queued_tasks () {
# RETURNS: True if a move was done, otherwise False
#===============================================================================
movefile () {
local fromdir="${1:?Usage: movefile fromdir todir path}"
local todir="${2:?Usage: movefile fromdir todir path}"
local path="${3:?Usage: movefile fromdir todir path}"
local fromdir="${1:?Usage: movefile fromdir todir path [FORCE]}"
local todir="${2:?Usage: movefile fromdir todir path [FORCE]}"
local path="${3:?Usage: movefile fromdir todir path [FORCE]}"
local FORCE="${4:-0}"
[[ ! -v FORCE ]] && FORCE=0
#
# Chop up the path. If it's just a file name then $dir and $file are the
@ -126,8 +131,16 @@ movefile () {
# TODO: Compare the two files?
#
if [[ -e $todir/$path ]]; then
if [[ $FORCE -eq 1 ]]; then
echo "File exists: $todir/$path"
echo "FORCE mode is ON so overwriting"
mv --force "$fromdir/$path" "$todir/$path"
echo "Moved $fromdir/$path"
return 0
else
echo "File already exists: $todir/$path"
return 1
fi
else
mv "$fromdir/$path" "$todir/$path"
echo "Moved $fromdir/$path"
@ -147,16 +160,28 @@ is_empty() {
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# NAME: _log
# DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script
# using the predefined $LOGREC, a template for 'printf'. If the
# latter is not defined the function will use a default.
# For some reason 'shellcheck' objects to this function. The
# first argument to 'printf' needs to be -1 to make the
# '%(fmt)T' use today's date and time.
# PARAMETERS: 1 - the message to write
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
# shellcheck disable=SC2317 disable=SC2059
_log () {
local msg="$1"
# echo "D> $LOGFILE $LOGREC"
[ -v LOGFILE ] || { echo "${FUNCNAME[0]}: \$LOGFILE is not defined"; exit 1; }
[ -v LOGREC ] || { local LOGREC='%(%F %T)T %s\n'; }
# echo "D> $LOGFILE $LOGREC"
printf "$LOGREC" -1 "$msg" >> "$LOGFILE"
return
}
#=== FUNCTION ================================================================
@ -189,6 +214,11 @@ Options:
to stop at.
-D Run in debug mode where a lot more information is
reported
-F Turn on FORCE mode (normally off). In this mode when
the files being tidied (moved) already exist, they are
overwritten. This is for the very rare case when
a show's audio has to be re-uploaded because of bad
audio or the wrong file being sent.
Examples
./tidy_uploaded # Run in (default) dry-run mode
@ -196,11 +226,14 @@ Examples
./tidy_uploaded -d0 # Live mode (without verbose messages)
./tidy_uploaded -c1 # Process 1 show in dry-run mode
./tidy_uploaded -D # Run with debugging enabled
./tidy_uploaded -F # Run with FORCE mode on
endusage
exit "$res"
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
@ -208,16 +241,18 @@ endusage
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
LOGREC='%(%F %T)T %s\n'
#
# Process options
#
while getopts :c:d:Dhv opt
while getopts :c:d:DFhv opt
do
case "${opt}" in
c) COUNT=$OPTARG;;
D) DEBUG=1;;
d) DRYRUN=$OPTARG;;
F) FORCE=1;;
h) _usage 0;;
v) VERBOSE=1;;
*) echo "** Unknown option"
@ -239,6 +274,9 @@ if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
FORCE=${FORCE:-0}
[[ $FORCE -eq 1 ]] && echo "Force mode - overwriting existing files"
VERBOSE=${VERBOSE:-0}
DEBUG=${DEBUG:-0}
@ -361,12 +399,17 @@ while read -r path; do
#
# A file on the IA exists in the upload area. Move the
# local one if we're not in dry-run mode, otherwise just
# report the move we would do.
# report the move we would do. If FORCE mode is on
# overwrite the file.
#
if [[ $DRYRUN -eq 0 ]]; then
movefile "$UPLOADS" "$ARCHIVE" "$file" && ((moves++))
movefile "$UPLOADS" "$ARCHIVE" "$file" "$FORCE" && ((moves++))
else
if [[ $FORCE -eq 0 ]]; then
printf 'Would move %s\n\tto %s\n' "$frompath" "$topath"
else
printf 'Would move %s\n\toverwriting %s\n' "$frompath" "$topath"
fi
fi
fi
done < "$TMP1"

View File

@ -33,11 +33,12 @@
#
#===============================================================================
use 5.010;
use v5.36;
use strict;
use warnings;
use utf8;
#use experimental 'smartmatch';
# TODO: use experimental::try;
use match::smart;

View File

@ -8,6 +8,9 @@
# DESCRIPTION: Run the commands necessary to upload a batch of HPR shows to
# archive.org
#
# ** NOW OBSOLETE **
# We do uploads differently now.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
@ -19,6 +22,9 @@
#
#===============================================================================
echo "Obsolete script. Do not use!"
cmd='exit'; $cmd
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}