hpr-tools/InternetArchive/update_state

343 lines
10 KiB
Plaintext
Raw Permalink Normal View History

#!/bin/bash -
#===============================================================================
#
# FILE: update_state
#
# USAGE: ./update_state
#
# DESCRIPTION: A script to update the state of shows which have been sent to
# the IA. It looks at the current state of the 'reservations'
# table on the HPR database and selects all shows which are in
# the state 'MEDIA_TRANSCODED'. It checks each one to see if it
# known to the IA and if so changes state to 'UPLOADED_TO_IA'.
#
# The IA check can be overridden using the '-F' option, but care
# should be taken not to do this unless it is known all eligible
# shows are uploaded.
#
# Note that the algorithm described here does not work for
# reserved shows like the Community News episodes since they are
# not submitted as such and have no entry in the 'reservations'
# table.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.10
# CREATED: 2022-04-19 12:50:52
# REVISION: 2024-06-01 14:19:20
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
# shellcheck disable=SC2034
VERSION="0.0.10"
STDOUT="/dev/fd/2"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "$SCRIPT: Unable to source functions"; exit 1; }
# shellcheck source=/home/cendjm/bin/function_lib.sh
source "$LIB"
#
# Colour codes
#
define_colours
# {{{ ---- Functions: ---- _usage _DEBUG
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: None
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
Usage: ./${SCRIPT} [-h] [-D] [-d] [-F] [-l N] [-m]
Version: $VERSION
Script to update the status in the 'reservations' table after a show has been
processed.
Options:
-h Print this help
-D Enable DEBUG mode where a lot of information about the working
of the script is displayed
-d Dry-run mode. Reports what it would do but doesn't do it
-F Force the update(s) without checking the state of the show on
the IA
-l N Limit the number of shows processed to N
-m Monochrome mode - no colours
-R Normally, if a show is not in the IA, the script retries
waiting for it to be uploaded (assuming it's being worked on
by the IA servers). Including -R limits the retries to one
which is useful when uploading multiple shows one at a time.
Examples
./${SCRIPT} -h
./${SCRIPT} -m
./${SCRIPT} -d
./${SCRIPT} -dm
./${SCRIPT} -Dd
./${SCRIPT} -F
./${SCRIPT} -l1
./${SCRIPT} -m
./${SCRIPT} -R
./${SCRIPT}
endusage
exit "$res"
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
hprvps|marvin|borg)
# UPLOADS="/data/IA/uploads"
BASEDIR="$HOME/IA" ;;
i7-desktop)
# UPLOADS="$HOME/HPR/IA/uploads"
BASEDIR="$HOME/HPR/IA" ;;
*)
echo "Wrong host!"; exit 1 ;;
esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Tools
#
BASECOM='curl -K ./.hpradmin_curlrc -s'
URL="https://hub.hackerpublicradio.org/cms/status.php"
QUERY1="${BASECOM} ${URL}"
QUERY2="${BASECOM} -o - ${URL}"
#
# Fallback URLs and commands
#
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
QUERY1_BAK="${BASECOM} ${URL_BAK}"
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
#
# Number of retries per show
#
RETRIES=3
#
# Option defaults
#
COLOUR=1 # use colours by default
DRYRUN=0 # live mode by default
DEBUG=0
FORCE=0
RETRYING=1 # retry if a show's not on the IA
DEFLIMIT=20
#
# Process options
#
while getopts :hdDFl:mR opt
do
case "${opt}" in
h) _usage;;
d) DRYRUN=1;;
D) DEBUG=1;;
F) FORCE=1;;
l) LIMIT=$OPTARG;;
m) COLOUR=0;;
R) RETRYING=0;;
?) echo "$SCRIPT: Invalid option; aborting"; exit 1;;
esac
done
shift $((OPTIND - 1))
#
# Cancel colours if requested
#
if [[ $COLOUR -eq 0 ]]; then
undefine_colours
fi
LIMIT=${LIMIT:-$DEFLIMIT}
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
_usage 1
fi
if [[ $FORCE -eq 1 ]]; then
coloured 'yellow' "Forcing updates without checking the IA state"
fi
if [[ $RETRYING -eq 0 ]]; then
coloured 'yellow' "Not retrying updates if the show is missing"
fi
#
# Check the argument count after any options
#
if [[ $# -ne 0 ]]; then
coloured 'red' "** ${SCRIPT} takes no arguments"
_usage 1
fi
#
# Collect the current table of shows requiring work. We expect something like:
# timestamp_epoc,ep_num,ep_date,key,status,email
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
# TODO: Check for a failure in the query?
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
#
reservations=$($QUERY2) || {
coloured 'red' "Problem querying $URL"
coloured 'yellow' "Falling back to $URL_BAK"
reservations=$($QUERY2_BAK) || {
coloured 'red' "Failed with fallback URL - aborting"
exit 1
}
}
_DEBUG "reservations = $reservations"
#
# Check which shows are on the IA and can be flagged as such. We get the work
# "queue" from the variable 'reservations' which contains lines returned from
# querying the CMS status interface.
#
showcount=0
while read -r line; do
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
state="${BASH_REMATCH[5]}"
show="${BASH_REMATCH[2]}"
#
# Process shows in just one of the states
#
if [[ $state = 'MEDIA_TRANSCODED' ]]; then
_DEBUG "show = $show, state = $state"
#
# If we're retrying (waiting for a show to be uploaded) then loop
# $RETRIES times, otherwise don't retry at all
#
if [[ $RETRYING -eq 1 ]]; then
retry_count=$RETRIES
else
retry_count=1
fi
while [ $retry_count -gt 0 ]; do
#
# Look for the show on the IA. If not found we sleep 30
# seconds and look again. This happens a limited number of
# times, controlled by $RETRIES, then we give up this show. If
# there are more shows then we keep going.
#
if [ $FORCE -eq 1 ] || ia metadata "hpr$show" --exists > /dev/null 2>&1; then
# if [ $FORCE -eq 1 ] || ia list "hpr$show" > /dev/null 2>&1; then
command="${QUERY1}?ep_num=${show}&status=UPLOADED_TO_IA"
command_bak="${QUERY1_BAK}?ep_num=${show}&status=UPLOADED_TO_IA"
#
# In dry-run mode we count this iteration as success. In
# live mode we exit if the command fails. Otherwise we
# assume the command succeeds and exit the retry loop.
#
if [[ $DRYRUN -eq 1 ]]; then
echo -e "Dry-run: would have run\n${yellow}$command${reset}"
else
coloured 'yellow' "$command"
$command || {
coloured 'red' "Problem querying $URL"
coloured 'yellow' "Falling back to $URL_BAK"
$command_bak || {
coloured 'red' "Failed with fallback URL - aborting"
exit 1
}
}
RES=$?
if [[ $RES -ne 0 ]]; then
coloured 'red' "** Problem running $command; aborting"
exit 1
fi
fi
#
# Success. Stop the loop
#
break
else
#
# Failed to find the show, have another go after a wait
#
coloured 'red' "Show $show is not yet uploaded"
sleep 30
fi
((retry_count--))
done
#
# Are all retries done, and are we retrying anyway?
#
if [[ $retry_count -eq 0 && $RETRYING -eq 1 ]]; then
coloured 'red' "Failed to update show $show; retry count reached"
coloured 'yellow' "The command 'ia list hpr$show' repeatedly returned \"failure\""
coloured 'yellow' "Database updates not done"
coloured 'yellow' "Try again later with './${SCRIPT}'"
fi
#
# Stop the loop if we have reached the limiting number
#
((showcount++))
[[ $showcount -eq $LIMIT ]] && {
echo "Upload limit ($LIMIT) reached"
((--showcount))
break
}
fi
fi
done <<< "$reservations"
if [[ $DRYRUN -eq 0 ]]; then
echo "Number of shows processed successfully: $showcount"
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker