#!/bin/bash - #=============================================================================== # # FILE: future_upload # # USAGE: ./future_upload # # DESCRIPTION: Uploads future HPR shows based on what is in the upload area # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as # well as 'update_state' # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.0.16 # CREATED: 2021-01-07 12:11:02 # REVISION: 2025-01-01 11:48:40 # #=============================================================================== set -o nounset # Treat unset variables as an error SCRIPT=${0##*/} # DIR=${0%/*} STDOUT="/dev/fd/2" VERSION="0.0.16" # # Load library functions # LIB="$HOME/bin/function_lib.sh" [ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; } # shellcheck disable=SC1090 source "$LIB" # {{{ -- Functions -- check_uploads, _log, _usage #=== FUNCTION ================================================================ # NAME: check_uploads # DESCRIPTION: Determines if files exist for uploading # PARAMETERS: 1 - filename prefix e.g. 'hpr9999' # RETURNS: True/false #=============================================================================== check_uploads () { local prefix=${1:?Usage: check_uploads prefix} local suff # # Look for files called hpr1234.flac and so on. Don't bother with the # hpr1234_source.flac one. As soon as a file is missing return with false. # 2025-01-01: Dropped 'spx' from the list # for suff in flac mp3 ogg opus wav; do if [[ ! -e $UPLOADS/$prefix.$suff ]]; then return 1 fi done # # Transcripts are (currently) in a sub-directory with the same name as the # IA item. We only cater for two types as of 2025. # for suff in txt srt; do if [[ ! -e $UPLOADS/$prefix/$prefix.$suff ]]; then return 1 fi done return 0 } #=== FUNCTION ================================================================ # NAME: _log # DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script # using the predefined $LOGREC, a template for 'printf'. If the # latter is not defined the function will use a default. # For some reason 'shellcheck' objects to this function. The # first argument to 'printf' needs to be -1 to make the # '%(fmt)T' use today's date and time. # PARAMETERS: 1 - the message to write # RETURNS: Nothing #=============================================================================== # shellcheck disable=SC2317 disable=SC2059 _log () { local msg="$1" # echo "D> $LOGFILE $LOGREC" [ -v LOGFILE ] || { echo "${FUNCNAME[0]}: \$LOGFILE is not defined"; exit 1; } [ -v LOGREC ] || { local LOGREC='%(%F %T)T %s\n'; } # echo "D> $LOGFILE $LOGREC" printf "$LOGREC" -1 "$msg" >> "$LOGFILE" return } #=== FUNCTION ================================================================ # NAME: _usage # DESCRIPTION: Report usage # PARAMETERS: 1 [optional] exit value # RETURNS: Nothing #=============================================================================== _usage () { local -i res="${1:-0}" cat >$STDOUT <<-endusage ${SCRIPT} - version: ${VERSION} Usage: ./${SCRIPT} [-h] [-v] [-D] [-d {0|1}] [-F] [-r] [-l cp] Uploads HPR shows to the Internet Archive that haven't yet been uploaded. This is as an alternative to uploading the next 5 shows each week for the coming week. Options: -h Print this help -v Run in verbose mode where more information is reported -D Run in debug mode where a lot more information is reported -d 0|1 Dry run: -d 1 (the default) runs the script in dry-run mode where nothing is uploaded but the actions that will be taken are reported; -d 0 turns off dry-run mode and the actions will be carried out. -F Force the upload of a show even if the checks for its state in the 'reservations' table gives the wrong state or none at all. This is a rare event which may come about if an "emergency" show is being deployed or in some other exceptional circumstance. -r Run in 'remote' mode, using the live database over an (already established) SSH tunnel. Default is to run against the local database. -l N Control the number of shows that can be uploaded at once. The range is 1 to $DEFLIMIT. Notes: 1. When running on 'borg' the method used is to run in faux 'local' mode. This means we have an open tunnel to the HPR server (mostly left open) and the default file .hpr_db.cfg points to the live database via this tunnel. So we do not use the -r option here. This is a bit of a hack! Sorry! endusage exit "$res" } # }}} #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Configure depending whether local or on borg # case $HOSTNAME in hprvps|marvin|borg) UPLOADS="/data/IA/uploads" BASEDIR="$HOME/IA" ;; i7-desktop) UPLOADS="$HOME/HPR/IA/uploads" BASEDIR="$HOME/HPR/IA" ;; *) echo "Wrong host!"; exit 1 ;; esac cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; } # # Prepare for logging # LOGS="$BASEDIR/logs" LOGFILE="$LOGS/$SCRIPT.log" LOGREC='%(%F %T)T %s\n' # # Tools # BASECOM='curl -K ./.hpradmin_curlrc -s' URL="https://hub.hackerpublicradio.org/cms/status.php" # QUERY1="${BASECOM} ${URL}" QUERY2="${BASECOM} -o - ${URL}" UPSTATE="$BASEDIR/update_state" # # Fallback URL # # URL_BAK="http://hub.hackerpublicradio.org/cms/status.php" # QUERY2_BAK="${BASECOM} -o - ${URL_BAK}" # # Prerequisites # # jq=$(command -v jq) # [ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; } ia=$(command -v ia) [ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; } [ -e "$BASEDIR/make_metadata" ] || { echo "Needs the 'make_metadata' script" exit 1 } [ -e "$UPSTATE" ] || { echo "Needs the 'update_state' script" exit 1 } # # File of processed shows # PROCFILE="$BASEDIR/.${SCRIPT}.dat" [ -e "$PROCFILE" ] || touch "$PROCFILE" # # Constants # RETRIES=5 DEFLIMIT=20 # # Process options # while getopts :d:FhvDr:l: opt do case "${opt}" in d) DRYRUN=$OPTARG;; D) DEBUG=1;; F) FORCE=1;; h) _usage 0;; v) VERBOSE=1;; r) REMOTE=1;; l) LIMIT=$OPTARG;; *) _usage 1;; esac done shift $((OPTIND - 1)) DRYRUN=${DRYRUN:-1} if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then echo "** Use '-d 0' or '-d 1'" _usage 1 fi [[ $DRYRUN -eq 1 ]] && echo "Dry run mode" FORCE=${FORCE:-0} VERBOSE=${VERBOSE:-0} DEBUG=${DEBUG:-0} REMOTE=${REMOTE:-0} if [[ $REMOTE -eq 0 ]]; then dbconfig="$BASEDIR/.hpr_db.cfg" [[ $VERBOSE -eq 1 ]] && echo "Local database mode" else dbconfig="$BASEDIR/.hpr_livedb.cfg" [[ $VERBOSE -eq 1 ]] && echo "Remote database mode" fi LIMIT=${LIMIT:-$DEFLIMIT} if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option" _usage 1 fi # # Should have no arguments # if [[ $# != 0 ]]; then echo "** ${SCRIPT} takes no arguments" _usage 1 fi # # Declarations # declare -A processed declare -A ready declare -A uploads minshow= maxshow= lastitem= # # Load array of processed shows # while read -r item; do processed+=([$item]=1) done < "$PROCFILE" [ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}" # # TODO: Create the associative array 'ready' containing the numbers of shows # ready for upload. This is a way to ensure that we don't try and upload shows # in transit to the upload area. # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Proposed code. Not sure what the actual URL will be nor what will be # returned if nothing is ready for upload yet # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # json=$(curl http://hackerpublicradio.org/queue.php -s -o -) # while read -r showno; do # ready+=([$showno]=1) # done < <(echo "${json}" | jq '.READY_FOR_IA_UPLOAD[] | tonumber') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Change of plan. Now we have a list of CSV values, so we need to do something # like this: # # reservations=$($BASECOM -o - $URL) # while read -r line; do # if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then # state="${BASH_REMATCH[5]}" # show="${BASH_REMATCH[2]}" # fi # if [[ $state = 'MEDIA_TRANSCODED' ]]; then # ready+=([$show]=1) # fi # done <<< $reservations # # At the end of this the associative array 'ready' will contain the keys of # shows that are ready for upload (presumably) so we can look in this array to # double check. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if [[ $FORCE -eq 0 ]]; then # # Collect the current table of shows requiring work. We expect something like: # timestamp_epoc,ep_num,ep_date,key,status,email # 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com # 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com # TODO: Check for a failure in the query?A # NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired # reservations=$($QUERY2) || { # echo "Problem querying $URL" # echo "Falling back to $URL_BAK" # reservations=$($QUERY2_BAK) || { # echo "Failed with fallback URL - aborting" # exit 1 # } echo "Failed to query $URL - aborting" exit 1 } _DEBUG "reservations = $reservations" # # The query above might fail in a way that just returns an empty string, # so check for that # if [[ -z $reservations ]]; then echo "No reply from $URL - can't continue" exit 1 fi # # The query returns the bare number, but we're using 'hprxxxx' as the key in # the 'ready' array. # while read -r line; do if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then state="${BASH_REMATCH[5]}" show="${BASH_REMATCH[2]}" fi if [[ $state = 'MEDIA_TRANSCODED' ]]; then ready+=([hpr$show]=1) fi done <<< "$reservations" _DEBUG "ready = ${!ready[*]}" else [ "$VERBOSE" -eq 1 ] && { echo "V: Not checking reservations table; force option used" } fi # # Process files. There will be several with the same prefix so look for # a change of prefix # while read -r path; do # # Extract the path relative to $UPLOADS and the IA item name from the # returned path. Assume names are 'hpr9999' format (with leading zeroes if # appropriate). # relpath="${path#"$UPLOADS"/}" item="${relpath:0:7}" _DEBUG "Found path $path" _DEBUG "Relative path $relpath" _DEBUG "Item $item" # # Detect that the item prefix has changed. If it has we're processing # a new IA identifier, so work on this one # if [[ $item != "$lastitem" ]]; then lastitem=$item echo -n "$lastitem " # # Have we already processed it? It might just not be in the cache even # though processed by some other means. # if [[ ! -v "processed[$lastitem]" ]]; then [ "$VERBOSE" -eq 1 ] && { echo "V: Not yet processed (or not cached) $lastitem" echo "V: Checking IA for $lastitem" } # # Ask the IA # if ia list "$lastitem" > /dev/null 2>&1; then # # We can't tell with 'ia list' whether the show is # a 'reserved' one. Need to look deeper. # # # It's on the IA already, save in the array # #[ $VERBOSE -eq 1 ] && echo "V: Already on IA - $lastitem" echo "- cached" processed+=([$lastitem]=1) else # # Is the show ready for upload? # if [[ $FORCE -eq 0 ]]; then if [[ ! -v "ready[$lastitem]" ]]; then echo "- not ready" continue fi fi # # Need to upload this one # #[ $VERBOSE -eq 1 ] && echo "V: To be uploaded to IA - $lastitem" echo "- to upload" uploads+=([${lastitem:3}]=1) # # First show we find not on the IA should be the lowest number # [ -z "$minshow" ] && minshow="${lastitem:3}" # # Last show number should be the highest # maxshow="${lastitem:3}" # # Stop the loop if we have reached the limiting number # [[ ${#uploads[@]} -eq $LIMIT ]] && { echo "Upload limit ($LIMIT) reached" break } fi else # # Recorded as having been seen # #[ $VERBOSE -eq 1 ] && echo "V: Already processed $lastitem" echo "- processed" fi fi done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort) # # Write the processed array to the cache file unless in dry-run mode # # [ $DEBUG -eq 1 ] && { echo -n 'D> '; declare -p processed; } _DEBUG "processed = ${!processed[*]}" [ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}" if [[ $DRYRUN -ne 1 ]]; then while read -r item; do echo "$item" done < <(printf '%s\n' "${!processed[@]}" | sort -u ) > "$PROCFILE" fi # # Generate the list of uploads for the 'make_metadata' option '-list=1,2,3'. # Order is unimportant because make_metadata sorts internally. # _DEBUG "uploads = ${!uploads[*]}" [ "$VERBOSE" -eq 1 ] && echo "Number of shows for upload: ${#uploads[@]}" printf -v list '%s,' "${!uploads[@]}" list="${list:0:-1}" # # If there are no uploads to do we can stop # [[ ! -v uploads[@] ]] && { echo "Nothing to do!"; exit; } # # Check that the shows being uploaded have all their files and log what is # happening. # while read -r show; do echo "$(date +%Y%m%d%H%M%S) preparing to upload hpr$show" >> "$LOGFILE" if ! check_uploads "hpr$show"; then echo "Missing files for show $show. Aborted!" echo "$(date +%Y%m%d%H%M%S) aborting on hpr$show - missing files" >> "$LOGFILE" exit 1 fi done < <(printf '%s\n' "${!uploads[@]}" | sort) # # Define output files. If the list contains one element then it's a different # name from the multi-element case (make_metadata does this too). # if [[ ${#uploads[@]} -eq 1 ]]; then metadata="metadata_${minshow}.csv" script="script_${minshow}.sh" else metadata="metadata_${minshow}-${maxshow}.csv" script="script_${minshow}-${maxshow}.sh" fi # # Perform the uploads or report what would be done # if [[ $DRYRUN -eq 1 ]]; then echo "Dry run: Would have uploaded list '$list'" echo "Dry run: Would have created $metadata and $script" echo "Dry run: Would have uploaded $metadata and run $script" echo "Dry run: Would have used $dbconfig" echo "$BASEDIR/make_metadata -dbconf=${dbconfig} -list=\"$list\" -verb -out -script" echo "$(date +%Y%m%d%H%M%S) no uploads done - dry-run mode" >> "$LOGFILE" else echo "Uploading the list '$list'" if yes_no "OK to continue? %s " "N"; then # shellcheck disable=2086 { # # Make the metadata # $BASEDIR/make_metadata -dbconf=${dbconfig} -list="$list" -verb -out -script RES=$? if [[ $RES -eq 0 ]]; then # # Upload in spreadsheet mode and run the generated script # ia upload --retries=$RETRIES --spreadsheet=${metadata} \ -n -H x-archive-keep-old-version:0 RES=$? if [[ $RES -eq 0 ]]; then # # Upload worked. Run the script if there is one # [[ -e $script ]] && ./${script} else echo "Failed to upload to IA; aborting" echo "$(date +%Y%m%d%H%M%S) IA uploads aborted due to errors" >> "$LOGFILE" exit 1 fi # # Append the sorted show details to the cache # echo "$list" |\ sed -e 's/\([0-9]\{4\}\)/hpr\1/g; s/,/\n/g' | sort >> "$PROCFILE" echo "Uploaded ${#uploads[@]} shows" echo "$(date +%Y%m%d%H%M%S) ${#uploads[@]} uploads completed" >> "$LOGFILE" # # Update the state in the HPR database, unless we're using # FORCE. Pass the limit used here to this script so it can # stop looking for work unnecessarily # if [[ $FORCE -eq 0 ]]; then $UPSTATE -l$LIMIT RES=$? if [[ $RES -ne 0 ]]; then echo "Problem updating database state" exit 1 fi else echo "Not updating the database, FORCE mode is on" fi else echo "Upload aborted due to errors" echo "$(date +%Y%m%d%H%M%S) uploads aborted due to errors" >> "$LOGFILE" exit 1 fi } else echo "Not uploaded, as requested" echo "$(date +%Y%m%d%H%M%S) uploads aborted by user" >> "$LOGFILE" fi fi exit # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker