forked from HPR/hpr-tools
579 lines
17 KiB
Bash
Executable File
579 lines
17 KiB
Bash
Executable File
#!/bin/bash -
|
|
#===============================================================================
|
|
#
|
|
# FILE: future_upload
|
|
#
|
|
# USAGE: ./future_upload
|
|
#
|
|
# DESCRIPTION: Uploads future HPR shows based on what is in the upload area
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as
|
|
# well as 'update_state'
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.0.14
|
|
# CREATED: 2021-01-07 12:11:02
|
|
# REVISION: 2024-03-03 14:12:30
|
|
#
|
|
#===============================================================================
|
|
|
|
set -o nounset # Treat unset variables as an error
|
|
|
|
SCRIPT=${0##*/}
|
|
# DIR=${0%/*}
|
|
|
|
STDOUT="/dev/fd/2"
|
|
|
|
VERSION="0.0.14"
|
|
|
|
#
|
|
# Load library functions
|
|
#
|
|
LIB="$HOME/bin/function_lib.sh"
|
|
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
|
|
# shellcheck disable=SC1090
|
|
source "$LIB"
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: check_uploads
|
|
# DESCRIPTION: Determines if files exist for uploading
|
|
# PARAMETERS: 1 - filename prefix e.g. 'hpr9999'
|
|
# RETURNS: True/false
|
|
#===============================================================================
|
|
check_uploads () {
|
|
local prefix=${1:?Usage: check_uploads prefix}
|
|
local suff
|
|
|
|
#
|
|
# Look for files called hpr1234.flac and so on. Don't bother with the
|
|
# hpr1234_source.flac one. As soon as a file is missing return with false.
|
|
#
|
|
for suff in flac mp3 ogg opus spx wav; do
|
|
if [[ ! -e $UPLOADS/$prefix.$suff ]]; then
|
|
return 1
|
|
fi
|
|
done
|
|
|
|
return 0
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _usage
|
|
# DESCRIPTION: Report usage
|
|
# PARAMETERS: 1 [optional] exit value
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
_usage () {
|
|
local -i res="${1:-0}"
|
|
|
|
cat >$STDOUT <<-endusage
|
|
${SCRIPT} - version: ${VERSION}
|
|
|
|
Usage: ./${SCRIPT} [-h] [-v] [-D] [-d {0|1}] [-F] [-r] [-l cp]
|
|
|
|
Uploads HPR shows to the Internet Archive that haven't yet been uploaded. This
|
|
is as an alternative to uploading the next 5 shows each week for the coming
|
|
week.
|
|
|
|
Options:
|
|
-h Print this help
|
|
-v Run in verbose mode where more information is reported
|
|
-D Run in debug mode where a lot more information is
|
|
reported
|
|
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
|
|
mode where nothing is uploaded but the actions that
|
|
will be taken are reported; -d 0 turns off dry-run
|
|
mode and the actions will be carried out.
|
|
-F Force the upload of a show even if the checks for its
|
|
state in the 'reservations' table gives the wrong
|
|
state or none at all. This is a rare event which may
|
|
come about if an "emergency" show is being deployed or
|
|
in some other exceptional circumstance.
|
|
-r Run in 'remote' mode, using the live database over an
|
|
(already established) SSH tunnel. Default is to run
|
|
against the local database.
|
|
-l N Control the number of shows that can be uploaded at
|
|
once. The range is 1 to $DEFLIMIT.
|
|
|
|
Notes:
|
|
|
|
1. When running on 'borg' the method used is to run in faux 'local' mode.
|
|
This means we have an open tunnel to the HPR server (mostly left open) and
|
|
the default file .hpr_db.cfg points to the live database via this tunnel.
|
|
So we do not use the -r option here. This is a bit of a hack! Sorry!
|
|
|
|
endusage
|
|
exit "$res"
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _DEBUG
|
|
# DESCRIPTION: Writes a message if in DEBUG mode
|
|
# PARAMETERS: List of messages
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
_DEBUG () {
|
|
[ "$DEBUG" == 0 ] && return
|
|
for msg in "$@"; do
|
|
printf 'D> %s\n' "$msg"
|
|
done
|
|
}
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
#
|
|
# Configure depending whether local or on borg
|
|
#
|
|
case $HOSTNAME in
|
|
hprvps|marvin|borg)
|
|
UPLOADS="/data/IA/uploads"
|
|
BASEDIR="$HOME/IA" ;;
|
|
i7-desktop)
|
|
UPLOADS="$HOME/HPR/IA/uploads"
|
|
BASEDIR="$HOME/HPR/IA" ;;
|
|
*)
|
|
echo "Wrong host!"; exit 1 ;;
|
|
esac
|
|
|
|
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
|
|
|
|
#
|
|
# Prepare for logging
|
|
#
|
|
LOGS="$BASEDIR/logs"
|
|
LOGFILE="$LOGS/$SCRIPT.log"
|
|
|
|
#
|
|
# Tools
|
|
#
|
|
BASECOM='curl -K ./.hpradmin_curlrc -s'
|
|
URL="https://hub.hackerpublicradio.org/cms/status.php"
|
|
# QUERY1="${BASECOM} ${URL}"
|
|
QUERY2="${BASECOM} -o - ${URL}"
|
|
UPSTATE="$BASEDIR/update_state"
|
|
|
|
#
|
|
# Fallback URL
|
|
#
|
|
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
|
|
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
|
|
|
|
#
|
|
# Prerequisites
|
|
#
|
|
# jq=$(command -v jq)
|
|
# [ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; }
|
|
ia=$(command -v ia)
|
|
[ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; }
|
|
[ -e "$BASEDIR/make_metadata" ] || {
|
|
echo "Needs the 'make_metadata' script"
|
|
exit 1
|
|
}
|
|
[ -e "$UPSTATE" ] || {
|
|
echo "Needs the 'update_state' script"
|
|
exit 1
|
|
}
|
|
|
|
#
|
|
# File of processed shows
|
|
#
|
|
PROCFILE="$BASEDIR/.${SCRIPT}.dat"
|
|
[ -e "$PROCFILE" ] || touch "$PROCFILE"
|
|
|
|
#
|
|
# Constants
|
|
#
|
|
RETRIES=5
|
|
DEFLIMIT=20
|
|
|
|
#
|
|
# Process options
|
|
#
|
|
while getopts :d:FhvDr:l: opt
|
|
do
|
|
case "${opt}" in
|
|
d) DRYRUN=$OPTARG;;
|
|
D) DEBUG=1;;
|
|
F) FORCE=1;;
|
|
h) _usage 0;;
|
|
v) VERBOSE=1;;
|
|
r) REMOTE=1;;
|
|
l) LIMIT=$OPTARG;;
|
|
*) _usage 1;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
DRYRUN=${DRYRUN:-1}
|
|
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
|
|
echo "** Use '-d 0' or '-d 1'"
|
|
_usage 1
|
|
fi
|
|
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
|
|
|
|
FORCE=${FORCE:-0}
|
|
|
|
VERBOSE=${VERBOSE:-0}
|
|
|
|
DEBUG=${DEBUG:-0}
|
|
|
|
REMOTE=${REMOTE:-0}
|
|
if [[ $REMOTE -eq 0 ]]; then
|
|
dbconfig="$BASEDIR/.hpr_db.cfg"
|
|
[[ $VERBOSE -eq 1 ]] && echo "Local database mode"
|
|
else
|
|
dbconfig="$BASEDIR/.hpr_livedb.cfg"
|
|
[[ $VERBOSE -eq 1 ]] && echo "Remote database mode"
|
|
fi
|
|
|
|
LIMIT=${LIMIT:-$DEFLIMIT}
|
|
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
|
|
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
|
|
_usage 1
|
|
fi
|
|
|
|
#
|
|
# Should have no arguments
|
|
#
|
|
if [[ $# != 0 ]]; then
|
|
echo "** ${SCRIPT} takes no arguments"
|
|
_usage 1
|
|
fi
|
|
|
|
#
|
|
# Declarations
|
|
#
|
|
declare -A processed
|
|
declare -A ready
|
|
declare -A uploads
|
|
minshow=
|
|
maxshow=
|
|
lastitem=
|
|
|
|
#
|
|
# Load array of processed shows
|
|
#
|
|
while read -r item; do
|
|
processed+=([$item]=1)
|
|
done < "$PROCFILE"
|
|
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
|
|
|
|
#
|
|
# TODO: Create the associative array 'ready' containing the numbers of shows
|
|
# ready for upload. This is a way to ensure that we don't try and upload shows
|
|
# in transit to the upload area.
|
|
#
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
# Proposed code. Not sure what the actual URL will be nor what will be
|
|
# returned if nothing is ready for upload yet
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
#
|
|
# json=$(curl http://hackerpublicradio.org/queue.php -s -o -)
|
|
# while read -r showno; do
|
|
# ready+=([$showno]=1)
|
|
# done < <(echo "${json}" | jq '.READY_FOR_IA_UPLOAD[] | tonumber')
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
# Change of plan. Now we have a list of CSV values, so we need to do something
|
|
# like this:
|
|
#
|
|
# reservations=$($BASECOM -o - $URL)
|
|
# while read -r line; do
|
|
# if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
|
|
# state="${BASH_REMATCH[5]}"
|
|
# show="${BASH_REMATCH[2]}"
|
|
# fi
|
|
# if [[ $state = 'MEDIA_TRANSCODED' ]]; then
|
|
# ready+=([$show]=1)
|
|
# fi
|
|
# done <<< $reservations
|
|
#
|
|
# At the end of this the associative array 'ready' will contain the keys of
|
|
# shows that are ready for upload (presumably) so we can look in this array to
|
|
# double check.
|
|
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
if [[ $FORCE -eq 0 ]]; then
|
|
#
|
|
# Collect the current table of shows requiring work. We expect something like:
|
|
# timestamp_epoc,ep_num,ep_date,key,status,email
|
|
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
|
|
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
|
|
# TODO: Check for a failure in the query?A
|
|
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
|
|
#
|
|
reservations=$($QUERY2) || {
|
|
# echo "Problem querying $URL"
|
|
# echo "Falling back to $URL_BAK"
|
|
# reservations=$($QUERY2_BAK) || {
|
|
# echo "Failed with fallback URL - aborting"
|
|
# exit 1
|
|
# }
|
|
echo "Failed to query $URL - aborting"
|
|
exit 1
|
|
}
|
|
_DEBUG "reservations = $reservations"
|
|
|
|
#
|
|
# The query above might fail in a way that just returns an empty string,
|
|
# so check for that
|
|
#
|
|
if [[ -z $reservations ]]; then
|
|
echo "No reply from $URL - can't continue"
|
|
exit 1
|
|
fi
|
|
|
|
#
|
|
# The query returns the bare number, but we're using 'hprxxxx' as the key in
|
|
# the 'ready' array.
|
|
#
|
|
while read -r line; do
|
|
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
|
|
state="${BASH_REMATCH[5]}"
|
|
show="${BASH_REMATCH[2]}"
|
|
fi
|
|
if [[ $state = 'MEDIA_TRANSCODED' ]]; then
|
|
ready+=([hpr$show]=1)
|
|
fi
|
|
done <<< "$reservations"
|
|
_DEBUG "ready = ${!ready[*]}"
|
|
else
|
|
[ "$VERBOSE" -eq 1 ] && {
|
|
echo "V: Not checking reservations table; force option used"
|
|
}
|
|
fi
|
|
|
|
#
|
|
# Process files. There will be several with the same prefix so look for
|
|
# a change of prefix
|
|
#
|
|
while read -r path; do
|
|
#
|
|
# Extract the path relative to $UPLOADS and the IA item name from the
|
|
# returned path. Assume names are 'hpr9999' format (with leading zeroes if
|
|
# appropriate).
|
|
#
|
|
relpath="${path#"$UPLOADS"/}"
|
|
item="${relpath:0:7}"
|
|
|
|
_DEBUG "Found path $path"
|
|
_DEBUG "Relative path $relpath"
|
|
_DEBUG "Item $item"
|
|
|
|
#
|
|
# Detect that the item prefix has changed. If it has we're processing
|
|
# a new IA identifier, so work on this one
|
|
#
|
|
if [[ $item != "$lastitem" ]]; then
|
|
lastitem=$item
|
|
echo -n "$lastitem "
|
|
|
|
#
|
|
# Have we already processed it? It might just not be in the cache even
|
|
# though processed by some other means.
|
|
#
|
|
if [[ ! -v "processed[$lastitem]" ]]; then
|
|
|
|
[ "$VERBOSE" -eq 1 ] && {
|
|
echo "V: Not yet processed (or not cached) $lastitem"
|
|
echo "V: Checking IA for $lastitem"
|
|
}
|
|
|
|
#
|
|
# Ask the IA
|
|
#
|
|
if ia list "$lastitem" > /dev/null 2>&1; then
|
|
#
|
|
# We can't tell with 'ia list' whether the show is
|
|
# a 'reserved' one. Need to look deeper.
|
|
#
|
|
|
|
#
|
|
# It's on the IA already, save in the array
|
|
#
|
|
#[ $VERBOSE -eq 1 ] && echo "V: Already on IA - $lastitem"
|
|
echo "- cached"
|
|
processed+=([$lastitem]=1)
|
|
else
|
|
#
|
|
# Is the show ready for upload?
|
|
#
|
|
if [[ $FORCE -eq 0 ]]; then
|
|
if [[ ! -v "ready[$lastitem]" ]]; then
|
|
echo "- not ready"
|
|
continue
|
|
fi
|
|
fi
|
|
|
|
#
|
|
# Need to upload this one
|
|
#
|
|
#[ $VERBOSE -eq 1 ] && echo "V: To be uploaded to IA - $lastitem"
|
|
echo "- to upload"
|
|
|
|
uploads+=([${lastitem:3}]=1)
|
|
|
|
#
|
|
# First show we find not on the IA should be the lowest number
|
|
#
|
|
[ -z "$minshow" ] && minshow="${lastitem:3}"
|
|
|
|
#
|
|
# Last show number should be the highest
|
|
#
|
|
maxshow="${lastitem:3}"
|
|
|
|
#
|
|
# Stop the loop if we have reached the limiting number
|
|
#
|
|
[[ ${#uploads[@]} -eq $LIMIT ]] && {
|
|
echo "Upload limit ($LIMIT) reached"
|
|
break
|
|
}
|
|
fi
|
|
else
|
|
#
|
|
# Recorded as having been seen
|
|
#
|
|
#[ $VERBOSE -eq 1 ] && echo "V: Already processed $lastitem"
|
|
echo "- processed"
|
|
fi
|
|
|
|
fi
|
|
|
|
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
|
|
|
|
#
|
|
# Write the processed array to the cache file unless in dry-run mode
|
|
#
|
|
# [ $DEBUG -eq 1 ] && { echo -n 'D> '; declare -p processed; }
|
|
_DEBUG "processed = ${!processed[*]}"
|
|
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
|
|
if [[ $DRYRUN -ne 1 ]]; then
|
|
while read -r item; do
|
|
echo "$item"
|
|
done < <(printf '%s\n' "${!processed[@]}" | sort -u ) > "$PROCFILE"
|
|
fi
|
|
|
|
#
|
|
# Generate the list of uploads for the 'make_metadata' option '-list=1,2,3'.
|
|
# Order is unimportant because make_metadata sorts internally.
|
|
#
|
|
_DEBUG "uploads = ${!uploads[*]}"
|
|
[ "$VERBOSE" -eq 1 ] && echo "Number of shows for upload: ${#uploads[@]}"
|
|
printf -v list '%s,' "${!uploads[@]}"
|
|
list="${list:0:-1}"
|
|
|
|
#
|
|
# If there are no uploads to do we can stop
|
|
#
|
|
[[ ! -v uploads[@] ]] && { echo "Nothing to do!"; exit; }
|
|
|
|
#
|
|
# Check that the shows being uploaded have all their files and log what is
|
|
# happening.
|
|
#
|
|
while read -r show; do
|
|
echo "$(date +%Y%m%d%H%M%S) preparing to upload hpr$show" >> "$LOGFILE"
|
|
|
|
if ! check_uploads "hpr$show"; then
|
|
echo "Missing files for show $show. Aborted!"
|
|
echo "$(date +%Y%m%d%H%M%S) aborting on hpr$show - missing files" >> "$LOGFILE"
|
|
exit 1
|
|
fi
|
|
done < <(printf '%s\n' "${!uploads[@]}" | sort)
|
|
|
|
#
|
|
# Define output files. If the list contains one element then it's a different
|
|
# name from the multi-element case (make_metadata does this too).
|
|
#
|
|
if [[ ${#uploads[@]} -eq 1 ]]; then
|
|
metadata="metadata_${minshow}.csv"
|
|
script="script_${minshow}.sh"
|
|
else
|
|
metadata="metadata_${minshow}-${maxshow}.csv"
|
|
script="script_${minshow}-${maxshow}.sh"
|
|
fi
|
|
|
|
#
|
|
# Perform the uploads or report what would be done
|
|
#
|
|
if [[ $DRYRUN -eq 1 ]]; then
|
|
echo "Dry run: Would have uploaded list '$list'"
|
|
echo "Dry run: Would have created $metadata and $script"
|
|
echo "Dry run: Would have uploaded $metadata and run $script"
|
|
echo "Dry run: Would have used $dbconfig"
|
|
echo "$BASEDIR/make_metadata -dbconf=${dbconfig} -list=\"$list\" -verb -out -script"
|
|
|
|
echo "$(date +%Y%m%d%H%M%S) no uploads done - dry-run mode" >> "$LOGFILE"
|
|
else
|
|
echo "Uploading the list '$list'"
|
|
if yes_no "OK to continue? %s " "N"; then
|
|
# shellcheck disable=2086
|
|
{
|
|
#
|
|
# Make the metadata
|
|
#
|
|
$BASEDIR/make_metadata -dbconf=${dbconfig} -list="$list" -verb -out -script
|
|
RES=$?
|
|
if [[ $RES -eq 0 ]]; then
|
|
#
|
|
# Upload in spreadsheet mode and run the generated script
|
|
#
|
|
ia upload --retries=$RETRIES --spreadsheet=${metadata} \
|
|
-n -H x-archive-keep-old-version:0
|
|
RES=$?
|
|
if [[ $RES -eq 0 ]]; then
|
|
#
|
|
# Upload worked. Run the script if there is one
|
|
#
|
|
[[ -e $script ]] && ./${script}
|
|
else
|
|
echo "Failed to upload to IA; aborting"
|
|
echo "$(date +%Y%m%d%H%M%S) IA uploads aborted due to errors" >> "$LOGFILE"
|
|
exit 1
|
|
fi
|
|
|
|
#
|
|
# Append the sorted show details to the cache
|
|
#
|
|
echo "$list" |\
|
|
sed -e 's/\([0-9]\{4\}\)/hpr\1/g; s/,/\n/g' | sort >> "$PROCFILE"
|
|
|
|
echo "Uploaded ${#uploads[@]} shows"
|
|
echo "$(date +%Y%m%d%H%M%S) ${#uploads[@]} uploads completed" >> "$LOGFILE"
|
|
|
|
#
|
|
# Update the state in the HPR database, unless we're using
|
|
# FORCE. Pass the limit used here to this script so it can
|
|
# stop looking for work unnecessarily
|
|
#
|
|
if [[ $FORCE -eq 0 ]]; then
|
|
$UPSTATE -l$LIMIT
|
|
RES=$?
|
|
if [[ $RES -ne 0 ]]; then
|
|
echo "Problem updating database state"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "Not updating the database, FORCE mode is on"
|
|
fi
|
|
|
|
|
|
else
|
|
echo "Upload aborted due to errors"
|
|
echo "$(date +%Y%m%d%H%M%S) uploads aborted due to errors" >> "$LOGFILE"
|
|
exit 1
|
|
fi
|
|
}
|
|
else
|
|
echo "Not uploaded, as requested"
|
|
echo "$(date +%Y%m%d%H%M%S) uploads aborted by user" >> "$LOGFILE"
|
|
fi
|
|
fi
|
|
|
|
exit
|
|
|
|
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21
|