hpr-tools/InternetArchive/past_upload

480 lines
14 KiB
Plaintext
Raw Normal View History

#!/bin/bash -
#===============================================================================
#
# FILE: past_upload
#
# USAGE: ./past_upload [-h] [-r] [-v] [-d {0|1}] start [count]
#
# DESCRIPTION: Run the commands necessary to upload a batch of older HPR
# shows to archive.org
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.12
# CREATED: 2021-04-17 22:14:16
# REVISION: 2022-07-07 16:17:41
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
VERSION="0.0.12"
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
UPLOAD="$BASEDIR/uploads"
;;
borg)
BASEDIR="$HOME/IA"
UPLOAD="/data/IA/uploads"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || exit 1
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#
# Log file
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#=== FUNCTION ================================================================
# NAME: _verbose
# DESCRIPTION: Writes a message in verbose mode
# PARAMETERS: $1 message
# RETURNS: Nothing
#===============================================================================
_verbose () {
local msg=${1:-}
[[ $VERBOSE -eq 1 ]] && echo "$msg"
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-r] [-v] [-d {0|1}] start [count]
Generates the necessary metadata and script and uses them to upload HPR audio
and other show-related files held on the VPS to the Internet Archive. This
script is similar to 'weekly_upload' but it's for dealing with older shows
where we only have the MP3 audio.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is changed but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-F Force an upload even if the items are already on the
IA. Use with *GREAT* caution!
-m Update the item's metadata from the file generated
for (re-)uploads. This ensures that any changes to the
notes, summary, tags, etc are propagated. This does
not happen by default, but shows with assets are
always updated this way.
-r Run in 'remote' mode, using the live database over an
(already established) SSH tunnel. Default is to run
against the local database.
-Y Answer 'Y' to the confirmation question (really don't
ask at all)
Arguments:
start the starting show number to be uploaded
count (optional, default 1) the number of shows to be
uploaded; not allowed to exceed 20
Notes:
1. When running on 'borg' the method used is to run in faux 'local' mode.
This means we have an open tunnel to the HPR server (mostly left open) and
the default file .hpr_db.cfg points to the live database via this tunnel.
So we do not use the -r option here. This is a bit of a hack! Sorry!
TODO: Needs fix!
2. There are potential problems when a show has no tags which haven't been
fully resolved. The make_metadata script fails in default mode when it
finds such a show, but this (weekly_upload) script can continue on and run
the generated script which uploads the source audio files. This can mean
the IA items end up as books! In this mode the description is not stored
and so there are no show notes.
endusage
exit "$result"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Prerequisites
#
jq=$(command -v jq)
[ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; }
ia=$(command -v ia)
[ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; }
transfer_tags=$(command -v transfer_tags)
[ -z "$transfer_tags" ] && { echo "Needs the 'transfer_tags' script"; exit 1; }
tunnel_is_open=$(command -v tunnel_is_open)
[ -z "$tunnel_is_open" ] && { echo "Needs the 'tunnel_is_open' script"; exit 1; }
[ -e "$BASEDIR/transcode" ] || {
echo "Needs the 'transcode' script"
exit 1
}
[ -e "$BASEDIR/make_metadata" ] || {
echo "Needs the 'make_metadata' script"
exit 1
}
#
# Constant
#
RETRIES=5
#
# Check the tunnel is open
#
if ! tunnel_is_open; then
echo "Open the tunnel before running this script (open_tunnel)"
exit 1
fi
#-------------------------------------------------------------------------------
# Process options
#-------------------------------------------------------------------------------
while getopts :d:FhmrvY opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
F) FORCE=1;;
h) _usage 1;;
m) METADATA=1;;
r) REMOTE=1;;
v) VERBOSE=1;;
Y) YES=1;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
#
# Check choices and set defaults
#
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
FORCE=${FORCE:-0}
METADATA=${METADATA:-0}
YES=${YES:-0}
VERBOSE=${VERBOSE:-0}
REMOTE=${REMOTE:-0}
if [[ $REMOTE -eq 0 ]]; then
dbconfig="$BASEDIR/.hpr_db.cfg"
_verbose "Local database mode"
else
dbconfig="$BASEDIR/.hpr_livedb.cfg"
_verbose "Remote database mode"
fi
#
# Check argument count
#
if [[ ! ( $# -eq 1 || $# -eq 2 ) ]]; then
echo "Wrong number of arguments"
_usage 1
fi
#
# Validate arguments
#
for arg; do
if [[ ! $arg =~ ^[0-9]{1,4}$ ]]; then
echo "Invalid number: $arg"
echo "Use a plain number"
exit 1
fi
done
#
# Set variables for the range of shows
#
start=$1
count=${2:-1}
if [[ $count -gt 20 ]]; then
echo "Can't process more than 20 shows at a time"
exit 1
fi
((end = start + count - 1))
[[ $DRYRUN -eq 1 ]] && _verbose "Dry run mode"
if [[ $VERBOSE -eq 1 ]]; then
echo "Processing $count $(ngettext show shows "$count") from $start"
else
echo "${start}..${end}"
fi
#
# Log the start of this run
#
[[ $DRYRUN -eq 0 ]] && \
echo "$(date +%Y%m%d%H%M%S) Processing ${start}..${end} (v$VERSION)" >> "$LOGFILE"
#
# Store the show numbers in an array. We need 'eval' to substitute `$start'
# and '$end' for the 'printf'.
#
declare -a shows
mapfile -t shows < <(eval "printf '%04d\n' {$start..$end}")
#
# Walk the array and delete elements that are already on the IA
#
if [[ $FORCE -eq 1 ]]; then
_verbose 'Not checking for shows on archive.org; forcing!'
[[ $DRYRUN -eq 0 ]] && echo "$(date +%Y%m%d%H%M%S) Forcing an update (-F)" >> "$LOGFILE"
else
_verbose 'Checking for shows on archive.org'
[[ $DRYRUN -eq 0 ]] && echo "$(date +%Y%m%d%H%M%S) Checking archive.org" >> "$LOGFILE"
i=0
for item in "${shows[@]}"; do
if ia list "hpr$item" > /dev/null 2>&1; then
_verbose "Found hpr$item on archive.org"
unset "shows[$i]"
fi
((i++))
done
fi
#
# Stop if there's nothing to do
#
if [[ ${#shows[@]} -eq 0 ]]; then
echo "Nothing to do; nominated show(s) are currently on archive.org"
[[ $DRYRUN -eq 0 ]] && echo "$(date +%Y%m%d%H%M%S) Nothing to do" >> "$LOGFILE"
exit 1
else
_verbose "There $(ngettext 'is 1 show' "are ${#shows[@]} shows" "${#shows[@]}") to process"
fi
#
# Find which audio needs to be downloaded and go get it
#
_verbose "Downloading missing audio..."
if [[ $DRYRUN -eq 1 ]]; then
echo "Would have attempted to download ${#shows[@]} $(ngettext show shows "${#shows[@]}") (dry run)"
else
for item in "${shows[@]}"; do
if [[ ! -e $UPLOAD/hpr$item.mp3 ]]; then
echo "Downloading hpr$item.mp3"
wget -q "http://hackerpublicradio.org/local/hpr$item.mp3" \
-O "$UPLOAD/hpr$item.mp3"
_verbose "Downloaded $UPLOAD/hpr$item.mp3"
else
_verbose "$UPLOAD/hpr$item.mp3 already exists"
fi
done
fi
#
# Transcode the audio as needed
#
_verbose "Transcoding missing audio..."
if [[ $DRYRUN -eq 1 ]]; then
echo "Would have transcoded ${#shows[@]} $(ngettext show shows "${#shows[@]}") (dry run)"
else
[[ $DRYRUN -eq 0 ]] && \
echo "$(date +%Y%m%d%H%M%S) Transcoding ${#shows[@]} $(ngettext show shows "${#shows[@]}")" >> "$LOGFILE"
for item in "${shows[@]}"; do
if [[ $VERBOSE -eq 1 ]]; then
./transcode -v "$UPLOAD/hpr$item.mp3"
else
./transcode "$UPLOAD/hpr$item.mp3"
fi
done
fi
#
# We now have a list of shows in the right state to be uploaded, so we can do
# what's necessary
#
_verbose "Uploading $(ngettext show shows "${#shows[@]}")..."
#
# Define files for make_metadata. For aesthetic reasons don't use '1-1' when
# there's only one show!
#
if [[ $start -eq $end ]]; then
printf -v metadata 'metadata_%04d.csv' "$start"
printf -v script 'script_%04d.sh' "$start"
else
printf -v metadata 'metadata_%04d-%04d.csv' "$start" "$end"
printf -v script 'script_%04d-%04d.sh' "$start" "$end"
fi
#
# Check on the dry-run choice
#
if [[ $DRYRUN -eq 1 ]]; then
echo "Dry run: Would have uploaded $count $(ngettext show shows "$count") from $start"
echo "Dry run: Would have created $metadata and $script"
echo "Dry run: Would have uploaded $metadata and run $script"
echo "Dry run: Would have used $dbconfig"
echo -n "Dry run: Would have done metadata updates for "
if [[ $METADATA -eq 0 ]]; then
echo "shows with assets"
else
echo "all shows"
fi
else
#
# Really do the upload
#
if [[ $start -eq $end ]]; then
echo "Uploading $start"
else
echo "Uploading $start to $end inclusive"
fi
#
# Implement the -Y (override) option
#
if [[ $YES -eq 1 ]]; then
confirmed=1
else
echo "$(date +%Y%m%d%H%M%S) Waiting for confirmation" >> "$LOGFILE"
if yes_no "OK to continue? %s " "N"; then
confirmed=1
else
confirmed=0
fi
fi
#---------------------------------------------------------------------------
# Do the work
#---------------------------------------------------------------------------
if [[ $confirmed -eq 1 ]]; then
# shellcheck disable=2086
{
#
# Make the metadata
#
_verbose "Running make_metadata"
$BASEDIR/make_metadata -dbconf=${dbconfig} \
-from=$start -count=$count \
-verb -out -script -a_count=$TMP1
RES=$?
#
# If it all went OK perform the uploads, otherwise report the
# problem(s)
#
if [[ $RES -eq 0 ]]; then
_verbose "Uploading audio and any assets"
ia upload --retries=$RETRIES --spreadsheet=${metadata} \
-H x-archive-keep-old-version:0 && \
[ -e $script ] && ./${script}
echo "$(date +%Y%m%d%H%M%S) Uploaded shows" >> "$LOGFILE"
else
echo "Upload aborted due to errors"
echo "$(date +%Y%m%d%H%M%S) Upload failed due to errors" >> "$LOGFILE"
exit 1
fi
#
# Update metadata for all shows if requested
#
if [[ $METADATA -eq 1 ]]; then
_verbose "Uploading changed metadata"
ia metadata --spreadsheet=${metadata}
echo "$(date +%Y%m%d%H%M%S) Metadata uploaded for all shows" >> "$LOGFILE"
else
#
# We aren't updating metadata for all, but if any shows had
# assets we need to do metadata updates. The show details are
# in the temporary file $TMP1
#
if [[ -s $TMP1 ]]; then
_verbose "Refreshing metadata for shows with assets"
declare -a mshows
mapfile -t mshows < <(cut -f1 -d' ' $TMP1 | sed -e 's/^hpr//' | sort)
mlist="${mshows[*]}"
if [[ ${#mshows[@]} -eq 1 ]]; then
printf -v metadata 'meta_metadata_%04d.csv' "${mshows[0]}"
else
printf -v metadata 'meta_metadata_%04d-%04d.csv' "${mshows[0]}" "${mshows[-1]}"
fi
_verbose "Regenerating metadata"
$BASEDIR/make_metadata -dbconf=${dbconfig} -list="${mlist/ /,}" \
-out=${metadata} -meta -noassets -verb
RES=$?
if [[ $RES -eq 0 ]]; then
_verbose "Uploading new metadata"
ia metadata --spreadsheet=${metadata}
echo "$(date +%Y%m%d%H%M%S) Metadata uploaded for eligible shows" >> "$LOGFILE"
else
echo "Metadata update aborted due to errors"
echo "$(date +%Y%m%d%H%M%S) Metadata upload failed due to errors" >> "$LOGFILE"
exit 1
fi
fi
fi
}
else
echo "Not uploaded"
echo "$(date +%Y%m%d%H%M%S) Upload aborted" >> "$LOGFILE"
fi
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21