hpr-tools/InternetArchive/repair_item
Dave Morriss 6805cd662b Updated 'repair_item'
InternetArchive/repair_item: originally planned in 2020 as a Bash script
    to find missing files in shows and then add them, it was not turned
    into the current form until May 2024. Now, with the heavy loading of
    the IA servers, normal uploads are timing out and being aborted.
    This script is more "determined" to upload files and usually
    successfully "repairs" shows that need it.
2024-06-15 17:14:22 +01:00

479 lines
14 KiB
Bash
Executable File

#!/bin/bash -
#===============================================================================
#
# FILE: repair_item
#
# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] itemname
#
# DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during
# the upload.
#
# The most common failures are caused by the file upload
# processes timing out and being aborted (by the 'ia' tool which
# performs the item creation and the uploads). This failure
# means that a show being processed on 'borg' does not get all
# of the components loaded to the IA.
#
# This script looks at the files belonging to the show (stored
# temporarily on 'borg') and determines which have not been
# uploaded, then takes steps to perform the uploads.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.9
# CREATED: 2020-01-05 22:42:46
# REVISION: 2024-06-14 18:03:58
#
#===============================================================================
#set -o nounset # Treat unset variables as an error
VERSION="0.0.9"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory for the host
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
UPLOADS="$HOME/HPR/IA/uploads"
;;
borg)
BASEDIR="$HOME/IA"
UPLOADS="/data/IA/uploads"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#
# Enable coloured messages
#
define_colours
#
# Sanity checks
#
JQ=$(command -v jq)
[ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; }
IA=$(command -v ia)
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
# {{{ -- Functions -- Upload, exists_in, queued_tasks, _DEBUG, _usage
#=== FUNCTION ================================================================
# NAME: Upload
# DESCRIPTION: Uploads a file to the Internet Archive with various options.
# Any output from the 'ia' command invocation is saved in
# a temporary file and the name reported to the caller
# PARAMETERS: 1 - the item id (e.g. 'hpr1234'
# 2 - the path to the file for upload
# 3 - (optional) the path to the file on the IA
# 4 - (optional) list of options for 'ia upload' enclosed as
# a string
# RETURNS: Exit code of last command
#===============================================================================
Upload () {
local id=${1}
local file=${2}
local remote=${3:-}
local options=${4:-}
local RES
if [[ -e $file ]]; then
if [[ -z $remote ]]; then
# shellcheck disable=SC2086
ia upload ${id} ${file} ${options} > /dev/null 2>&1
RES=$?
return $RES
else
# shellcheck disable=SC2086
ia upload ${id} ${file} --remote-name=${remote} ${options} > /dev/null 2>&1
RES=$?
return $RES
fi
else
echo "File missing: $file"
return 1
fi
}
#=== FUNCTION ================================================================
# NAME: exists_in
# DESCRIPTION: Checks the existence of a key in an associative array
# PARAMETERS: $1 array name
# $2 key value
# RETURNS: True if the key exists, False otherwise
#
# Modified from
# https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array
#===============================================================================
exists_in () {
# shellcheck disable=SC2086
eval '[ ${'$1'[$2]+muahaha} ]'
}
#=== FUNCTION ================================================================
# NAME: queued_tasks
# DESCRIPTION: Queries the IA for any queued or running tasks for an item.
# Writes the number to STDOUT so it can be captured.
# PARAMETERS: $1 IA item (like hpr1192)
# RETURNS: Nothing
#===============================================================================
queued_tasks () {
local item="${1:?Usage: queued_tasks item}"
local -i count=0
count="$(ia tasks "$item" |\
jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
echo "$count"
return
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] item
Attempts to repair an IA item where the upload has failed for some reason.
Options:
-h Print this help
-v Run in verbose mode where more information is
reported. Default is off.
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is changed but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-D Run in debug mode where a lot more information is
reported
-l N Control the number of files that can be uploaded
during one run of the script. The range is 1 to
$DEFLIMIT. This can be helpful when there are upload
problems.
Arguments:
item The item in the form 'hpr1234'
endusage
exit "$result"
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Directories and files
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Constants
#
DEFLIMIT=20
#
# Process options
#
while getopts :d:Dhl:v opt
do
case "${opt}" in
D) DEBUG=1;;
d) DRYRUN=$OPTARG;;
h) _usage 0;;
l) LIMIT=$OPTARG;;
v) VERBOSE=1;;
*) echo "** Unknown option"
_usage 1;;
esac
done
shift $((OPTIND - 1))
#
# Set option defaults and check their values
#
VERBOSE=${VERBOSE:-0}
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $VERBOSE -eq 1 && $DRYRUN -eq 1 ]] && echo "Dry run mode"
DEBUG=${DEBUG:-0}
[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode"
LIMIT=${LIMIT:-$DEFLIMIT}
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
_usage 1
fi
#
# Should have one argument
#
if [[ $# != 1 ]]; then
coloured 'red' "Missing argument"
_usage 1
fi
item="${1}"
#
# Ensure item spec is correctly formatted
#
if [[ $item =~ hpr([0-9]+) ]]; then
printf -v item 'hpr%04d' "${BASH_REMATCH[1]}"
else
coloured 'red' "Incorrect show specification: $item"
coloured 'yellow' "Use 'hpr9999' format"
exit 1
fi
_DEBUG "Parsed item: $item"
#
# It's possible that the show upload failed before anything was uploaded, even
# the metadata. It's never been seen, but it seems wise to cater for it.
#
if ! ia metadata "$item" --exists > /dev/null 2>&1; then
coloured 'red' "This item is not apparently on the IA; can't continue"
coloured 'yellow' "Try running the entire upload again from the start"
exit 1
fi
#
# Declarations
#
declare -A fcache
declare -A iacache
declare -a missed
#
# Scan the directory 'UPLOADS' where files for upload to the IA are stored and
# collect everything for this item (show).
#
# See the `find' pipeline at the end of the loop which selects only files, not
# directories. It outputs the last change time and the full file path, sorts
# on the time, then removes it. This ensures we process the files in time
# order rather than alphabetic order of their names.
#
# TODO: This algorithm is from another script and is not needed here. The
# order of processing is irrelevant here so simplify the 'find' and the loop.
# We are only looking for the 'item' specified by the argument, not other
# ones.
#
while read -r path; do
relpath="${path#"$UPLOADS"/}"
item="${relpath:0:7}"
[[ $VERBOSE -eq 1 ]] && echo "Found $path"
_DEBUG "Path: $path"
_DEBUG "Relative path: $relpath"
_DEBUG "IA item: $item"
if ! exists_in fcache "$relpath"; then
# shellcheck disable=SC2034
fcache[$relpath]=1
fi
done < <(find "$UPLOADS" -type f -regextype posix-extended \
-regex ".*$item.*" -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort | cut -f2 -d' ')
#
# Did we find anything?
#
if [[ ${#fcache[@]} -eq 0 ]]; then
coloured 'red' "No files found for item $item in $UPLOADS"
coloured 'red' "Can't continue"
exit 1
fi
#
# Look to see if there are any tasks queued for this show on the IA servers.
# If there are we can't continue.
#
# TODO: This could be a loop waiting for tasks to complete rather than
# aborting and asking to be rerun.
#
tasks=$(queued_tasks "$item")
if [[ $tasks -gt 0 ]]; then
coloured 'red' \
"Item $item still has $tasks unfinished $(ngettext task tasks "$tasks")"
coloured 'red' "Allow time for task(s) to finish and try again later"
exit 1
fi
#
# Interrogate the IA for the required item contents. If it returns True we can
# collect its contents, otherwise we can't proceed. The file 'TMP1' contains
# just a simple list of the files on the IA relating to this item.
#
if ia list "$item" > "$TMP1"; then
while read -r iafile; do
# shellcheck disable=SC2034
iacache[$iafile]=1
done < "$TMP1"
else
coloured 'red' "Item $item is not in the IA"
coloured 'red' "Can't continue"
exit 1
fi
#
# Look through the list of files we found and detect any not on the IA
#
for path in "${!fcache[@]}"; do
if ! exists_in iacache "$path"; then
missed+=("$path")
fi
done
#
# Counters and defaults for the loop
#
retry_threshold=5
sleeptime=20
failures=0
upload_count=0
#
# If there are missed files we can report what we'd be doing or do it,
# otherwise we have nothing to do.
#
if [[ ${#missed[@]} -eq 0 ]]; then
coloured 'green' "All expected files for item $item are on the IA"
else
mcount="${#missed[@]}"
coloured 'red' "There $(ngettext "is 1 missing file" "are $mcount missing files" "$mcount"):"
[[ $DRYRUN -eq 1 ]] && {
coloured 'blue' "Dry run: Would have run the following command(s):"
}
for file in "${missed[@]}"; do
cmd="Upload $item $UPLOADS/$file "
cmd+="'$file' '--retries=5 --no-derive -H x-archive-keep-old-version:0'"
if [[ $DRYRUN -eq 1 ]]; then
coloured 'yellow' "$cmd"
else
retries=0
coloured 'blue' "Uploading $file"
#
# Run 'cmd'. If it succeeds then write to the log and loop for the
# next missing file. If it fails enter the 'until' loop and report
# and the problem. Count the number of times this is done, so it
# doesn't loop forever. If we have reached the limit count this as
# a failure and continue the parent loop (with the next missing
# file). If we haven't retried enough yet, sleep for a while and
# try again. The intention is to catch the case when an upload
# times out. The 'ia' command is performing its own retries per
# upload when the system is overloaded, but these are non-fatal.
#
until eval "$cmd"; do
coloured 'red' "Failure when uploading $file"
((retries++))
printf '%s Failed to upload %s to the IA [%d]\n' \
"$(date +%Y%m%d%H%M%S)" "$file" "$retries" >> "$LOGFILE"
[ "$retries" -eq "$retry_threshold" ] && {
((failures++))
[[ $VERBOSE -eq 1 ]] && \
coloured 'blue' "Retry limit reached; abandoning this file"
continue 2
}
[[ $VERBOSE -eq 1 ]] && coloured 'blue' "Pausing for $sleeptime and retrying"
sleep $sleeptime
done # until eval ...
coloured 'green' "Uploaded $file to the IA"
echo "$(date +%Y%m%d%H%M%S) Uploaded $file to the IA" >> "$LOGFILE"
fi
#
# Count actual uploads and dry-run ones the same
#
((upload_count++))
#
# Stop the missed file loop if we have reached the limiting number, in
# dry-run and live mode
#
[[ $upload_count -eq $LIMIT ]] && {
coloured 'blue' "Upload limit ($LIMIT) reached"
break
}
done # for file in ...
fi
#
# Summarise how many upload failures were detected
#
if [[ $failures -gt 0 ]]; then
coloured 'red' \
"There $(ngettext "was $failures upload failure" "were $failures upload failures" $failures)"
coloured 'yellow' 'Run this script again to repeat the repair attempt'
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker