hpr-tools/InternetArchive/update_state
Dave Morriss 0f1e727487 New 'reformat_html', plus some cleaning
InternetArchive/future_upload: now updates the state of shows

InternetArchive/reformat_html: new Perl script to reformat the HTML
    originally found in the HPR database in the 'notes' field to the format
    required in the 'description' field of an item on the IA. It reads
    from STDIN and writes to STDOUT.
2025-02-13 11:24:27 +00:00

342 lines
10 KiB
Bash
Executable File

#!/bin/bash -
#===============================================================================
#
# FILE: update_state
#
# USAGE: ./update_state [-h] [-D] [-d] [-F] [-l N] [-m]
#
# DESCRIPTION: A script to update the state of shows which have been sent to
# the IA. It looks at the current state of the 'reservations'
# table on the HPR database and selects all shows which are in
# the state 'MEDIA_TRANSCODED'. It checks each one to see if it
# known to the IA and if so changes state to 'UPLOADED_TO_IA'.
#
# The IA check can be overridden using the '-F' option, but care
# should be taken not to do this unless it is known all eligible
# shows are uploaded.
#
# Note that the algorithm described here does not work for
# reserved shows like the Community News episodes since they are
# not submitted as such and have no entry in the 'reservations'
# table.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.10
# CREATED: 2022-04-19 12:50:52
# REVISION: 2024-06-01 14:19:20
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
# shellcheck disable=SC2034
VERSION="0.0.10"
STDOUT="/dev/fd/2"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "$SCRIPT: Unable to source functions"; exit 1; }
# shellcheck source=/home/cendjm/bin/function_lib.sh
source "$LIB"
#
# Colour codes
#
define_colours
# {{{ ---- Functions: ---- _usage _DEBUG
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: None
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
Usage: ./${SCRIPT} [-h] [-D] [-d] [-F] [-l N] [-m]
Version: $VERSION
Script to update the status in the 'reservations' table after a show has been
processed.
Options:
-h Print this help
-D Enable DEBUG mode where a lot of information about the working
of the script is displayed
-d Dry-run mode. Reports what it would do but doesn't do it
-F Force the update(s) without checking the state of the show on
the IA
-l N Limit the number of shows processed to N
-m Monochrome mode - no colours
-R Normally, if a show is not in the IA, the script retries
waiting for it to be uploaded (assuming it's being worked on
by the IA servers). Including -R limits the retries to one
which is useful when uploading multiple shows one at a time.
Examples
./${SCRIPT} -h
./${SCRIPT} -m
./${SCRIPT} -d
./${SCRIPT} -dm
./${SCRIPT} -Dd
./${SCRIPT} -F
./${SCRIPT} -l1
./${SCRIPT} -m
./${SCRIPT} -R
./${SCRIPT}
endusage
exit "$res"
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
hprvps|marvin|borg)
# UPLOADS="/data/IA/uploads"
BASEDIR="$HOME/IA" ;;
i7-desktop)
# UPLOADS="$HOME/HPR/IA/uploads"
BASEDIR="$HOME/HPR/IA" ;;
*)
echo "Wrong host!"; exit 1 ;;
esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Tools
#
BASECOM='curl -K ./.hpradmin_curlrc -s'
URL="https://hub.hackerpublicradio.org/cms/status.php"
QUERY1="${BASECOM} ${URL}"
QUERY2="${BASECOM} -o - ${URL}"
#
# Fallback URLs and commands
#
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
QUERY1_BAK="${BASECOM} ${URL_BAK}"
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
#
# Number of retries per show
#
RETRIES=3
#
# Option defaults
#
COLOUR=1 # use colours by default
DRYRUN=0 # live mode by default
DEBUG=0
FORCE=0
RETRYING=1 # retry if a show's not on the IA
DEFLIMIT=20
#
# Process options
#
while getopts :hdDFl:mR opt
do
case "${opt}" in
h) _usage;;
d) DRYRUN=1;;
D) DEBUG=1;;
F) FORCE=1;;
l) LIMIT=$OPTARG;;
m) COLOUR=0;;
R) RETRYING=0;;
?) echo "$SCRIPT: Invalid option; aborting"; exit 1;;
esac
done
shift $((OPTIND - 1))
#
# Cancel colours if requested
#
if [[ $COLOUR -eq 0 ]]; then
undefine_colours
fi
LIMIT=${LIMIT:-$DEFLIMIT}
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
_usage 1
fi
if [[ $FORCE -eq 1 ]]; then
coloured 'yellow' "Forcing updates without checking the IA state"
fi
if [[ $RETRYING -eq 0 ]]; then
coloured 'yellow' "Not retrying updates if the show is missing"
fi
#
# Check the argument count after any options
#
if [[ $# -ne 0 ]]; then
coloured 'red' "** ${SCRIPT} takes no arguments"
_usage 1
fi
#
# Collect the current table of shows requiring work. We expect something like:
# timestamp_epoc,ep_num,ep_date,key,status,email
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
# TODO: Check for a failure in the query?
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
#
reservations=$($QUERY2) || {
coloured 'red' "Problem querying $URL"
coloured 'yellow' "Falling back to $URL_BAK"
reservations=$($QUERY2_BAK) || {
coloured 'red' "Failed with fallback URL - aborting"
exit 1
}
}
_DEBUG "reservations = $reservations"
#
# Check which shows are on the IA and can be flagged as such. We get the work
# "queue" from the variable 'reservations' which contains lines returned from
# querying the CMS status interface.
#
showcount=0
while read -r line; do
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
state="${BASH_REMATCH[5]}"
show="${BASH_REMATCH[2]}"
#
# Process shows in just one of the states
#
if [[ $state = 'MEDIA_TRANSCODED' ]]; then
_DEBUG "show = $show, state = $state"
#
# If we're retrying (waiting for a show to be uploaded) then loop
# $RETRIES times, otherwise don't retry at all
#
if [[ $RETRYING -eq 1 ]]; then
retry_count=$RETRIES
else
retry_count=1
fi
while [ $retry_count -gt 0 ]; do
#
# Look for the show on the IA. If not found we sleep 30
# seconds and look again. This happens a limited number of
# times, controlled by $RETRIES, then we give up this show. If
# there are more shows then we keep going.
#
if [ $FORCE -eq 1 ] || ia metadata "hpr$show" --exists > /dev/null 2>&1; then
# if [ $FORCE -eq 1 ] || ia list "hpr$show" > /dev/null 2>&1; then
command="${QUERY1}?ep_num=${show}&status=UPLOADED_TO_IA"
command_bak="${QUERY1_BAK}?ep_num=${show}&status=UPLOADED_TO_IA"
#
# In dry-run mode we count this iteration as success. In
# live mode we exit if the command fails. Otherwise we
# assume the command succeeds and exit the retry loop.
#
if [[ $DRYRUN -eq 1 ]]; then
echo -e "Dry-run: would have run\n${yellow}$command${reset}"
else
coloured 'yellow' "$command"
$command || {
coloured 'red' "Problem querying $URL"
coloured 'yellow' "Falling back to $URL_BAK"
$command_bak || {
coloured 'red' "Failed with fallback URL - aborting"
exit 1
}
}
RES=$?
if [[ $RES -ne 0 ]]; then
coloured 'red' "** Problem running $command; aborting"
exit 1
fi
fi
#
# Success. Stop the loop
#
break
else
#
# Failed to find the show, have another go after a wait
#
coloured 'red' "Show $show is not yet uploaded"
sleep 30
fi
((retry_count--))
done
#
# Are all retries done, and are we retrying anyway?
#
if [[ $retry_count -eq 0 && $RETRYING -eq 1 ]]; then
coloured 'red' "Failed to update show $show; retry count reached"
coloured 'yellow' "The command 'ia list hpr$show' repeatedly returned \"failure\""
coloured 'yellow' "Database updates not done"
coloured 'yellow' "Try again later with './${SCRIPT}'"
fi
#
# Stop the loop if we have reached the limiting number
#
((showcount++))
[[ $showcount -eq $LIMIT ]] && {
echo "Upload limit ($LIMIT) reached"
((--showcount))
break
}
fi
fi
done <<< "$reservations"
if [[ $DRYRUN -eq 0 ]]; then
echo "Number of shows processed successfully: $showcount"
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker