InternetArchive/future_upload: now updates the state of shows InternetArchive/reformat_html: new Perl script to reformat the HTML originally found in the HPR database in the 'notes' field to the format required in the 'description' field of an item on the IA. It reads from STDIN and writes to STDOUT.
342 lines
10 KiB
Bash
Executable File
342 lines
10 KiB
Bash
Executable File
#!/bin/bash -
|
|
#===============================================================================
|
|
#
|
|
# FILE: update_state
|
|
#
|
|
# USAGE: ./update_state [-h] [-D] [-d] [-F] [-l N] [-m]
|
|
#
|
|
# DESCRIPTION: A script to update the state of shows which have been sent to
|
|
# the IA. It looks at the current state of the 'reservations'
|
|
# table on the HPR database and selects all shows which are in
|
|
# the state 'MEDIA_TRANSCODED'. It checks each one to see if it
|
|
# known to the IA and if so changes state to 'UPLOADED_TO_IA'.
|
|
#
|
|
# The IA check can be overridden using the '-F' option, but care
|
|
# should be taken not to do this unless it is known all eligible
|
|
# shows are uploaded.
|
|
#
|
|
# Note that the algorithm described here does not work for
|
|
# reserved shows like the Community News episodes since they are
|
|
# not submitted as such and have no entry in the 'reservations'
|
|
# table.
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: ---
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.0.10
|
|
# CREATED: 2022-04-19 12:50:52
|
|
# REVISION: 2024-06-01 14:19:20
|
|
#
|
|
#===============================================================================
|
|
|
|
set -o nounset # Treat unset variables as an error
|
|
|
|
SCRIPT=${0##*/}
|
|
# DIR=${0%/*}
|
|
|
|
# shellcheck disable=SC2034
|
|
VERSION="0.0.10"
|
|
|
|
STDOUT="/dev/fd/2"
|
|
|
|
#
|
|
# Load library functions
|
|
#
|
|
LIB="$HOME/bin/function_lib.sh"
|
|
[ -e "$LIB" ] || { echo "$SCRIPT: Unable to source functions"; exit 1; }
|
|
# shellcheck source=/home/cendjm/bin/function_lib.sh
|
|
source "$LIB"
|
|
|
|
#
|
|
# Colour codes
|
|
#
|
|
define_colours
|
|
|
|
# {{{ ---- Functions: ---- _usage _DEBUG
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _usage
|
|
# DESCRIPTION: Report usage
|
|
# PARAMETERS: None
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
_usage () {
|
|
local -i res="${1:-0}"
|
|
|
|
cat >$STDOUT <<-endusage
|
|
Usage: ./${SCRIPT} [-h] [-D] [-d] [-F] [-l N] [-m]
|
|
|
|
Version: $VERSION
|
|
|
|
Script to update the status in the 'reservations' table after a show has been
|
|
processed.
|
|
|
|
Options:
|
|
-h Print this help
|
|
-D Enable DEBUG mode where a lot of information about the working
|
|
of the script is displayed
|
|
-d Dry-run mode. Reports what it would do but doesn't do it
|
|
-F Force the update(s) without checking the state of the show on
|
|
the IA
|
|
-l N Limit the number of shows processed to N
|
|
-m Monochrome mode - no colours
|
|
-R Normally, if a show is not in the IA, the script retries
|
|
waiting for it to be uploaded (assuming it's being worked on
|
|
by the IA servers). Including -R limits the retries to one
|
|
which is useful when uploading multiple shows one at a time.
|
|
|
|
Examples
|
|
./${SCRIPT} -h
|
|
./${SCRIPT} -m
|
|
./${SCRIPT} -d
|
|
./${SCRIPT} -dm
|
|
./${SCRIPT} -Dd
|
|
./${SCRIPT} -F
|
|
./${SCRIPT} -l1
|
|
./${SCRIPT} -m
|
|
./${SCRIPT} -R
|
|
./${SCRIPT}
|
|
|
|
endusage
|
|
exit "$res"
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _DEBUG
|
|
# DESCRIPTION: Writes a message if in DEBUG mode
|
|
# PARAMETERS: List of messages
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
_DEBUG () {
|
|
[ "$DEBUG" == 0 ] && return
|
|
for msg in "$@"; do
|
|
printf 'D> %s\n' "$msg"
|
|
done
|
|
}
|
|
|
|
# }}}
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
#
|
|
# Configure depending whether local or on borg
|
|
#
|
|
case $HOSTNAME in
|
|
hprvps|marvin|borg)
|
|
# UPLOADS="/data/IA/uploads"
|
|
BASEDIR="$HOME/IA" ;;
|
|
i7-desktop)
|
|
# UPLOADS="$HOME/HPR/IA/uploads"
|
|
BASEDIR="$HOME/HPR/IA" ;;
|
|
*)
|
|
echo "Wrong host!"; exit 1 ;;
|
|
esac
|
|
|
|
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
|
|
|
|
#
|
|
# Tools
|
|
#
|
|
BASECOM='curl -K ./.hpradmin_curlrc -s'
|
|
URL="https://hub.hackerpublicradio.org/cms/status.php"
|
|
QUERY1="${BASECOM} ${URL}"
|
|
QUERY2="${BASECOM} -o - ${URL}"
|
|
|
|
#
|
|
# Fallback URLs and commands
|
|
#
|
|
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
|
|
QUERY1_BAK="${BASECOM} ${URL_BAK}"
|
|
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
|
|
|
|
#
|
|
# Number of retries per show
|
|
#
|
|
RETRIES=3
|
|
|
|
#
|
|
# Option defaults
|
|
#
|
|
COLOUR=1 # use colours by default
|
|
DRYRUN=0 # live mode by default
|
|
DEBUG=0
|
|
FORCE=0
|
|
RETRYING=1 # retry if a show's not on the IA
|
|
DEFLIMIT=20
|
|
|
|
#
|
|
# Process options
|
|
#
|
|
while getopts :hdDFl:mR opt
|
|
do
|
|
case "${opt}" in
|
|
h) _usage;;
|
|
d) DRYRUN=1;;
|
|
D) DEBUG=1;;
|
|
F) FORCE=1;;
|
|
l) LIMIT=$OPTARG;;
|
|
m) COLOUR=0;;
|
|
R) RETRYING=0;;
|
|
?) echo "$SCRIPT: Invalid option; aborting"; exit 1;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
#
|
|
# Cancel colours if requested
|
|
#
|
|
if [[ $COLOUR -eq 0 ]]; then
|
|
undefine_colours
|
|
fi
|
|
|
|
LIMIT=${LIMIT:-$DEFLIMIT}
|
|
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
|
|
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
|
|
_usage 1
|
|
fi
|
|
|
|
if [[ $FORCE -eq 1 ]]; then
|
|
coloured 'yellow' "Forcing updates without checking the IA state"
|
|
fi
|
|
|
|
if [[ $RETRYING -eq 0 ]]; then
|
|
coloured 'yellow' "Not retrying updates if the show is missing"
|
|
fi
|
|
|
|
#
|
|
# Check the argument count after any options
|
|
#
|
|
if [[ $# -ne 0 ]]; then
|
|
coloured 'red' "** ${SCRIPT} takes no arguments"
|
|
_usage 1
|
|
fi
|
|
|
|
#
|
|
# Collect the current table of shows requiring work. We expect something like:
|
|
# timestamp_epoc,ep_num,ep_date,key,status,email
|
|
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
|
|
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
|
|
# TODO: Check for a failure in the query?
|
|
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
|
|
#
|
|
reservations=$($QUERY2) || {
|
|
coloured 'red' "Problem querying $URL"
|
|
coloured 'yellow' "Falling back to $URL_BAK"
|
|
reservations=$($QUERY2_BAK) || {
|
|
coloured 'red' "Failed with fallback URL - aborting"
|
|
exit 1
|
|
}
|
|
}
|
|
_DEBUG "reservations = $reservations"
|
|
|
|
#
|
|
# Check which shows are on the IA and can be flagged as such. We get the work
|
|
# "queue" from the variable 'reservations' which contains lines returned from
|
|
# querying the CMS status interface.
|
|
#
|
|
showcount=0
|
|
while read -r line; do
|
|
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
|
|
state="${BASH_REMATCH[5]}"
|
|
show="${BASH_REMATCH[2]}"
|
|
|
|
#
|
|
# Process shows in just one of the states
|
|
#
|
|
if [[ $state = 'MEDIA_TRANSCODED' ]]; then
|
|
_DEBUG "show = $show, state = $state"
|
|
|
|
#
|
|
# If we're retrying (waiting for a show to be uploaded) then loop
|
|
# $RETRIES times, otherwise don't retry at all
|
|
#
|
|
if [[ $RETRYING -eq 1 ]]; then
|
|
retry_count=$RETRIES
|
|
else
|
|
retry_count=1
|
|
fi
|
|
|
|
while [ $retry_count -gt 0 ]; do
|
|
#
|
|
# Look for the show on the IA. If not found we sleep 30
|
|
# seconds and look again. This happens a limited number of
|
|
# times, controlled by $RETRIES, then we give up this show. If
|
|
# there are more shows then we keep going.
|
|
#
|
|
if [ $FORCE -eq 1 ] || ia metadata "hpr$show" --exists > /dev/null 2>&1; then
|
|
# if [ $FORCE -eq 1 ] || ia list "hpr$show" > /dev/null 2>&1; then
|
|
command="${QUERY1}?ep_num=${show}&status=UPLOADED_TO_IA"
|
|
command_bak="${QUERY1_BAK}?ep_num=${show}&status=UPLOADED_TO_IA"
|
|
|
|
#
|
|
# In dry-run mode we count this iteration as success. In
|
|
# live mode we exit if the command fails. Otherwise we
|
|
# assume the command succeeds and exit the retry loop.
|
|
#
|
|
if [[ $DRYRUN -eq 1 ]]; then
|
|
echo -e "Dry-run: would have run\n${yellow}$command${reset}"
|
|
else
|
|
coloured 'yellow' "$command"
|
|
$command || {
|
|
coloured 'red' "Problem querying $URL"
|
|
coloured 'yellow' "Falling back to $URL_BAK"
|
|
$command_bak || {
|
|
coloured 'red' "Failed with fallback URL - aborting"
|
|
exit 1
|
|
}
|
|
}
|
|
RES=$?
|
|
if [[ $RES -ne 0 ]]; then
|
|
coloured 'red' "** Problem running $command; aborting"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
#
|
|
# Success. Stop the loop
|
|
#
|
|
break
|
|
else
|
|
#
|
|
# Failed to find the show, have another go after a wait
|
|
#
|
|
coloured 'red' "Show $show is not yet uploaded"
|
|
sleep 30
|
|
fi
|
|
((retry_count--))
|
|
done
|
|
|
|
#
|
|
# Are all retries done, and are we retrying anyway?
|
|
#
|
|
if [[ $retry_count -eq 0 && $RETRYING -eq 1 ]]; then
|
|
coloured 'red' "Failed to update show $show; retry count reached"
|
|
coloured 'yellow' "The command 'ia list hpr$show' repeatedly returned \"failure\""
|
|
coloured 'yellow' "Database updates not done"
|
|
coloured 'yellow' "Try again later with './${SCRIPT}'"
|
|
fi
|
|
|
|
#
|
|
# Stop the loop if we have reached the limiting number
|
|
#
|
|
((showcount++))
|
|
[[ $showcount -eq $LIMIT ]] && {
|
|
echo "Upload limit ($LIMIT) reached"
|
|
((--showcount))
|
|
break
|
|
}
|
|
|
|
fi
|
|
fi
|
|
done <<< "$reservations"
|
|
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
echo "Number of shows processed successfully: $showcount"
|
|
fi
|
|
|
|
exit
|
|
|
|
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker
|