#!/bin/bash - #=============================================================================== # # FILE: repair_assets # # USAGE: ./repair_assets showid # # DESCRIPTION: Given a show where there was a directory of asset files on the # old HPR server which got lost in the migration, rebuild it # and fill it with assets from the IA. Modify the show notes to # point to these recovered assets. # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.0.10 # CREATED: 2024-05-10 21:26:31 # REVISION: 2024-10-02 17:34:47 # #=============================================================================== # set -o nounset # Treat unset variables as an error VERSION="0.0.10" SCRIPT=${0##*/} # DIR=${0%/*} STDOUT="/dev/fd/2" # # Select the appropriate working directory for the host # case $(hostname) in i7-desktop) BASEDIR="$HOME/HPR/InternetArchive" ;; borg) BASEDIR="$HOME/IA" ;; *) echo "Wrong host!" exit 1 ;; esac cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; } # # Load library functions # LIB="$HOME/HPR/function_lib.sh" [ -e "$LIB" ] || { echo "Unable to source functions"; exit; } # shellcheck disable=SC1090 source "$LIB" # # Enable coloured messages # define_colours # # Sanity checks. Calling 'command' on a non-existent script/program will # return an empty string. This works for built-in stuff and things on the # PATH. These are all 'true' tests with the stuff in braces being run if they # are not true. # IA=$(command -v ia) [ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; } Q2T=$(command -v query2tt2) [ -n "$Q2T" ] || { echo "Program 'query2tt2' was not found"; exit 1; } FIXAL="$BASEDIR/fix_asset_links" [ -e "$FIXAL" ] || { echo "Program '$FIXAL' was not found"; exit 1; } TUNNEL_IS_OPEN=$(command -v tunnel_is_open) [ -n "$TUNNEL_IS_OPEN" ] || { echo "Program 'tunnel_is_open' was not found"; exit 1; } OPEN_TUNNEL=$(command -v open_tunnel) [ -n "$OPEN_TUNNEL" ] || { echo "Program 'open_tunnel' was not found"; exit 1; } # # Make temporary files and set traps to delete them # TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } TMP2=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } trap 'cleanup_temp $TMP1 $TMP2' SIGHUP SIGINT SIGPIPE SIGTERM EXIT # {{{ -- Functions -- _verbose, _usage, _log, find_missing, make_dir #=== FUNCTION ================================================================ # NAME: find_missing # DESCRIPTION: Given two arrays containing IA assets and HPR assets, # determine which IA assets are missing from the HPR list. # PARAMETERS: $1 (nameref) IA list # $2 (nameref) HPR list # $3 Name of array to receive list of missing assets # RETURNS: Nothing #=============================================================================== # find_missing () { # local -n IA="${1}" # local -n HPR="${2}" # local output="${3}" # # local -A hIA hHPR # local i key # # # # # Make a hash keyed by the IA file base names from an indexed array # # # for (( i=0; i<${#IA[@]}; i++ )); do # hIA+=([${IA[$i]##*/}]=${IA[$i]}) # done # # # # # Make a hash keyed by the HPR file base names from an indexed array # # # for (( i=0; i<${#HPR[@]}; i++ )); do # hHPR+=([${HPR[$i]##*/}]=${HPR[$i]}) # done # # # # # Use the basename keys to check what's missing, but return the full path # # names. # # # for key in "${!hIA[@]}"; do # if ! exists_in hHPR "$key"; then # eval "$output+=('${hIA[$key]}')" # fi # done # } find_missing () { local -n IA="${1}" local -n HPR="${2}" local output="${3}" local -A hIA hHPR local i key # # Make a hash keyed by the full IA paths from an indexed array # for (( i=0; i<${#IA[@]}; i++ )); do hIA+=([${IA[$i]}]=$i) done # # Make a hash keyed by the HPR file paths from an indexed array, but # remove the first element for parity with the IA paths. We are going to # copy the IA paths, not these, so we never need the full paths again # here. # for (( i=0; i<${#HPR[@]}; i++ )); do hHPR+=([${HPR[$i]#*/}]=$i) done # # Use the full path keys to check what's missing, and return the IA full # path names. # for key in "${!hIA[@]}"; do if ! exists_in hHPR "$key"; then eval "$output+=('$key')" fi done } #=== FUNCTION ================================================================ # NAME: make_dir # DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on # errors. # PARAMETERS: $1 directory path # RETURNS: True if success, otherwise exits the caller script #=============================================================================== make_dir () { local dir="${1}" if [[ ! -d $dir ]]; then mkdir -p "$dir" || { coloured 'red' "Failed to create $dir" exit 1 } fi } #=== FUNCTION ================================================================ # NAME: _verbose # DESCRIPTION: Writes a message in verbose mode # PARAMETERS: * message strings to write # RETURNS: Nothing #=============================================================================== _verbose () { [ "$VERBOSE" -eq 0 ] && return for msg; do printf '%s\n' "$msg" done } #=== FUNCTION ================================================================ # NAME: _log # DESCRIPTION: Appends a record to the file "$LOGFILE" # PARAMETERS: $1 Message to write # RETURNS: Nothing #=============================================================================== _log () { local message="${1}" echo "$(date +%F\ %T) $message" >> "$LOGFILE" } #=== FUNCTION ================================================================ # NAME: _usage # DESCRIPTION: Reports usage; always exits the script after doing so # PARAMETERS: 1 - the integer to pass to the 'exit' command # RETURNS: Nothing #=============================================================================== _usage () { local -i result=${1:-0} cat >$STDOUT <<-endusage ${SCRIPT} - version: ${VERSION} Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] showid Attempts to repair an show where the directory of assets was not transferred from the old HPR server. Options: -h Print this help -v Run in verbose mode where more information is reported. Default is off. If -v is repeated it increases the verbosity level (levels 1 and 2 only). -d 0|1 Dry run: -d 1 (the default) runs the script in dry-run mode where nothing is changed but the actions that will be taken are reported; -d 0 turns off dry-run mode and the actions will be carried out. -D Run in debug mode where a lot more information is reported Arguments: showid The show id in the form 'hpr1234' endusage exit "$result" } # }}} #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #------------------------------------------------------------------------------- # Directories and files #------------------------------------------------------------------------------- LOGS="$BASEDIR/logs" make_dir "${LOGS}" LOGFILE="$LOGS/$SCRIPT.log" #------------------------------------------------------------------------------- # Options #------------------------------------------------------------------------------- # Default settings # VERBOSE=0 # # Process options # while getopts :d:Dhv opt do case "${opt}" in D) DEBUG=1;; d) DRYRUN=$OPTARG;; h) _usage 0;; v) ((VERBOSE++));; *) echo "** Unknown option" _usage 1;; esac done shift $((OPTIND - 1)) # # Set option defaults and check their values # DRYRUN=${DRYRUN:-1} if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then coloured 'red' "** Use '-d 0' or '-d 1'" _usage 1 fi [[ $VERBOSE -gt 0 && $DRYRUN -eq 1 ]] && echo "Dry run mode" DEBUG=${DEBUG:-0} [[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode" #------------------------------------------------------------------------------- # Argument check #------------------------------------------------------------------------------- # Should have one argument # if [[ $# != 1 ]]; then coloured 'red' "Missing argument" _usage 1 fi show="${1,,}" # # Ensure show id is correctly formatted. We want it to be 'hpr1234' but we # allow the 'hpr' bit to be omitted, as well as any leading zeroes. We need to # handle the weirdness of "leading zero means octal" though, but we always # store it as 'hpr1234' once processed. # if [[ $show =~ (hpr)?([0-9]+) ]]; then printf -v show 'hpr%04d' "$((10#${BASH_REMATCH[2]}))" else coloured 'red' "Incorrect show specification: $show" coloured 'yellow' "Use 'hpr9999' or '9999' format" exit 1 fi _DEBUG "Parsed item: $show" echo "Processing show $show" _log "Processing show $show; dry-run: $([ "$DRYRUN" -eq 1 ] && echo "on" || echo "off")" #------------------------------------------------------------------------------- # Declarations and constants #------------------------------------------------------------------------------- declare -a iacache # # SHOWURL is where the show will be on the webserver # printf -v SHOWURL 'https://hackerpublicradio.org/eps/%s/index.html' "$show" # # CACHEDIR is where we store asset details and files # CACHEDIR="$BASEDIR/assets" [ ! -d "$CACHEDIR" ] && { coloured 'red' "Creating cache directory" make_dir "$CACHEDIR" } # # Pointers into the cache: # LOCAL_ASSETDIR - where the cache for this show lives # LOCAL_FILEDIR - where the IA files have been placed # LOCAL_PARENTDIR - the equivalent directory to the top show dir # LOCAL_ASSETDIR="$CACHEDIR/${show}" LOCAL_FILEDIR="$LOCAL_ASSETDIR/files" LOCAL_PARENTDIR="$LOCAL_FILEDIR/${show}" # # Pointers to the HPR server directories: # REMOTE_ASSETDIR - where the assets are to go # REMOTE_PARENTDIR - the remote parent directory # REMOTE_ASSETDIR="public_html/eps/${show}/${show}" REMOTE_PARENTDIR="public_html/eps/${show}" CMDTPL='ssh hpr@hackerpublicradio.org %s' MANIFEST="$CACHEDIR/$show/manifest" DBNOTES="$CACHEDIR/$show/notes.html" #------------------------------------------------------------------------------- # Check the show exists in the database (or is visible on the website). #------------------------------------------------------------------------------- _verbose "Checking the show exists on the HPR server" result=$(curl --head --silent --write-out "%{http_code}" --output /dev/null "$SHOWURL") if [[ $result -eq 404 ]]; then coloured 'red' "Could not detect show '$show' on the HPR server" _log "Show '$show' not on the HPR server" exit 1 fi #------------------------------------------------------------------------------- # Check the show exists on the IA #------------------------------------------------------------------------------- _verbose "Checking the show exists on the IA server" if ! ia metadata "$show" --exists > /dev/null 2>&1; then coloured 'red' "Could not detect show '$show' on the IA server" coloured 'yellow' "Check that archive.org is available" coloured 'yellow' "Try https://downfor.io/internet-archive" _log "Show '$show' not on the IA server" exit 1 fi #------------------------------------------------------------------------------- # Check IA, collect contents, classify them #------------------------------------------------------------------------------- # Interrogate the IA for the required item contents. If it returns True we can # collect its contents, otherwise we can't proceed. The file 'TMP1' contains # just a simple list of the files on the IA relating to this item. # _verbose "Collecting filenames from the IA server" if ia list "$show" > "$TMP1"; then while read -r iafile; do iacache+=("$iafile") done < "$TMP1" else coloured 'red' "Item $show can't be found on the IA" coloured 'red' "Can't continue" _log "Files for show '$show' not on the IA server" exit 1 fi _DEBUG "IA cache" "${iacache[@]}" # # Determine which files are assets # _verbose "Categorising files held on the IA" declare -a audio ia_transcript ia_asset audio_re="^${show}\.(flac|mp3|ogg|opus|spx|wav)\$" # transcript_re="^${show}/${show}/${show}\.(json|srt|tsv|txt|vtt)\$" transcript_re="^${show}/${show}\.(json|srt|tsv|txt|vtt)\$" asset_re="^${show}/(${show}/)?.*\$" metadata_re="^(__ia_thumb.jpg|${show}[^/]+\.(afpk|torrent|gz|xml|sqlite|png))\$" for file in "${iacache[@]}"; do if [[ $file =~ $audio_re ]]; then audio+=("$file") elif [[ $file =~ $metadata_re ]]; then _verbose "Skipping $file" continue elif [[ $file =~ $transcript_re ]]; then ia_transcript+=("$file") elif [[ $file =~ $asset_re ]]; then ia_asset+=("$file") fi done # # Report what was collected at verbosity level 2 # if [[ $VERBOSE -gt 1 ]]; then coloured 'cyan' "** audio (${#audio[@]}):" printf '%s\n' "${audio[@]}" coloured 'cyan' "** transcript (${#ia_transcript[@]}):" printf '%s\n' "${ia_transcript[@]}" coloured 'cyan' "** asset (${#ia_asset[@]}):" printf '%s\n' "${ia_asset[@]}" _log "IA asset count for show '$show' = ${#ia_asset[@]}" fi # # No assets, no need to proceed! # if [[ ${#ia_asset[@]} -eq 0 ]]; then coloured 'green' "No IA assets found for show $show; nothing to do" _log "Nothing to do for show $show" exit fi #------------------------------------------------------------------------------- # Check what's on the HPR server #------------------------------------------------------------------------------- # # 'rc' is the remote command template # printf -v rc 'find public_html/eps/%s -type f -printf "%s/%%P\\n"' "$show" "$show" # # 'command' is the local command we'll run to run a remote command on the HPR # server # # shellcheck disable=SC2059 disable=SC2089 printf -v command "$CMDTPL" "'$rc'" if [[ $VERBOSE -gt 1 ]]; then echo "Command: $command" fi declare -a hpr_asset ignore_re="index.html$" # # Run the command and save the output. Save the asset names returned in an # array. TODO: Handle errors from the command # # # NOTE: We also want to interrogate the HPR state in dry-run mode # # if [[ $DRYRUN -eq 0 ]]; then # else # coloured 'yellow' "Would have searched for assets on the HPR server" # fi eval "$command" > "$TMP2" RES=$? if [[ $RES -eq 0 ]]; then _verbose "$(coloured 'green' "Remote command successful")" while read -r hprfile; do if [[ ! $hprfile =~ $ignore_re ]]; then hpr_asset+=("${hprfile}") fi done < "$TMP2" _verbose "$(coloured 'green' "Assets found on HPR server = ${#hpr_asset[@]}")" _verbose "$(printf '%s\n' "${hpr_asset[@]}")" _log "Assets found on HPR server = ${#hpr_asset[@]}" else coloured 'red' "Remote command failed" _log "Failed while searching for HPR assets" exit 1 fi #------------------------------------------------------------------------------- # Compare the two asset lists and return what's missing on the HPR server #------------------------------------------------------------------------------- # TODO: The algorithm in find_missing does not handle the instance where there # are pictures in one directory and a lower directory containing thumbnails, # AND THE FILE NAMES ARE THE SAME! # declare -a missing if [[ ${#hpr_asset[@]} -eq 0 ]]; then missing=( "${ia_asset[@]}" ) else find_missing ia_asset hpr_asset missing fi _verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")" _verbose "$(printf '%s\n' "${missing[@]}")" if [[ ${#missing[@]} -eq 0 ]]; then coloured 'green' "No missing assets detected; nothing to do" _log "No missing assets detected; nothing to do" exit else coloured 'yellow' \ "Found ${#missing[@]} $(ngettext file files ${#missing[@]}) missing on the HPR server" fi #------------------------------------------------------------------------------- # Prepare to copy the missing files #------------------------------------------------------------------------------- make_dir "$LOCAL_FILEDIR" declare -a downloads # # Check whether files are already downloaded # for file in "${missing[@]}"; do if [[ ! -e "$LOCAL_FILEDIR/$show/$file" ]]; then downloads+=("$file") fi done _verbose "$(coloured 'cyan' "** downloads (${#downloads[@]}):")" _verbose "$(printf '%s\n' "${downloads[@]}")" # # If we have files to download get them now # if [[ ${#downloads[@]} -gt 0 ]]; then if [[ $DRYRUN -eq 1 ]]; then coloured 'yellow' "Would have downloaded missing files from the IA" else ia download "$show" --destdir="$LOCAL_FILEDIR" "${downloads[@]}" RES=$? if [[ $RES -eq 0 ]]; then coloured 'green' "Downloads complete" _log "Downloaded IA assets for show $show" fi fi else coloured 'yellow' "IA files are already downloaded" fi # shellcheck disable=SC2089 RSYNCTPL="rsync -a -e 'ssh' %s hpr@hpr:%s" #------------------------------------------------------------------------------- # Build the 'ssh' command to make a directory #------------------------------------------------------------------------------- # # Prepare to make the remote directory if necessary. # # - $rc is the remote command we'll run on the server # - $command is the full 'ssh' command including $rc # printf -v rc 'if [ ! -e "%s" ]; then mkdir -p "%s"; fi' \ "$REMOTE_ASSETDIR" "$REMOTE_ASSETDIR" # shellcheck disable=SC2059 disable=SC2089 printf -v command "$CMDTPL" "'$rc'" #------------------------------------------------------------------------------- # Run or report the command that would be run #------------------------------------------------------------------------------- if [[ $DRYRUN -eq 0 ]]; then eval "$command" RES=$? if [[ $RES -eq 0 ]]; then coloured 'green' "Remote directory creation successful" else coloured 'red' "Remote directory creation failed" fi else coloured 'yellow' "Would have created the remote directory" echo "$command" fi #------------------------------------------------------------------------------- # Synchronise assets to the directory #------------------------------------------------------------------------------- # We perform an 'rsync' over 'ssh' to synchronise files from # ~/HPR/InternetArchive/assets/hprXXXX/files/hprXXXX to # public_html/eps/hprXXXX (on the HPR server) # # shellcheck disable=SC2059 disable=SC2089 printf -v command "$RSYNCTPL" "$LOCAL_PARENTDIR/" "$REMOTE_PARENTDIR/" if [[ $DRYRUN -eq 0 ]]; then eval "$command" RES=$? if [[ $RES -eq 0 ]]; then coloured 'green' "Remote upload successful" _log "Uploaded assets for show $show" else coloured 'red' "Remote upload failed" exit 1 fi else coloured 'yellow' "Would have synchronised local assets with the remote directory" echo "$command" fi #------------------------------------------------------------------------------- # Make a 'manifest' file if necessary #------------------------------------------------------------------------------- if [[ $DRYRUN -eq 0 ]]; then if [[ ! -e $MANIFEST ]]; then find "$LOCAL_PARENTDIR" -type f -printf '%P\n' > "$MANIFEST" _verbose "$(coloured 'green' "Created manifest file")" _log "Created manifest file $MANIFEST" fi fi #------------------------------------------------------------------------------- # Save the notes from the database if necessary #------------------------------------------------------------------------------- if [[ $DRYRUN -eq 0 ]]; then if [[ ! -e $DBNOTES ]]; then if ! $TUNNEL_IS_OPEN; then $OPEN_TUNNEL fi if $Q2T -config="$BASEDIR/.hpr_livedb.cfg" \ -temp="$BASEDIR/query2tt2_nokey.tpl" \ -out="$DBNOTES" \ -dbarg="${show:3}" \ 'select notes from eps where id = ?' then _verbose "$(coloured 'green' "Created notes file")" _log "Created notes file $DBNOTES" else _verbose "$(coloured 'red' "Creation of notes file failed")" _log "Creation of notes file $DBNOTES failed" fi fi fi #------------------------------------------------------------------------------- # Adjust the notes with 'fix_asset_links' (exists but not fully implemented # yet :-) #------------------------------------------------------------------------------- if [[ $DRYRUN -eq 0 ]]; then echo "$FIXAL" # $FIXAL fi # # All done # if [[ $DRYRUN -eq 0 ]]; then _log "Repaired show $show" fi #------------------------------------------------------------------------------- # √ Make a place to hold the files on this machine # √ Download them from the IA # √ Make a directory on the HPR server # √ Copy the assets to the HPR server # √ Modify the notes to point to the assets on the server #------------------------------------------------------------------------------- # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker