#!/bin/bash - #=============================================================================== # # FILE: recover_transcripts # # USAGE: ./recover_transcripts item # # DESCRIPTION: Intended to be run on `borg`; collects assets from the # locally-mounted backup disk and places them in a local # directory (organised to be compatible with the IA), then # uploads anything that is missing on the IA. # # Version 0.1.* looks for assets in the 'eps/' directory and # copies them to the cache. Also moves the IA copies so all is # aligned. Many shows earlier than mid 2019 are likely to need # this addition. # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.1.4 # CREATED: 2024-07-14 13:22:58 # REVISION: 2024-08-20 17:38:19 # #=============================================================================== # set -o nounset # Treat unset variables as an error VERSION="0.1.4" SCRIPT=${0##*/} # DIR=${0%/*} STDOUT="/dev/fd/2" # # Select the appropriate working directory for the host # case $(hostname) in i7-desktop) echo "To be run only on 'borg'" exit 1 ;; borg) BASEDIR="$HOME/IA" REPAIRS="$BASEDIR/repairs" BACKUP="/mnt/backup_disk/HPR/HPR-MIRROR" ;; *) echo "Wrong host!" exit 1 ;; esac cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; } # # Load library functions # LIB="$HOME/bin/function_lib.sh" [ -e "$LIB" ] || { echo "Unable to source functions"; exit; } # shellcheck disable=SC1090 source "$LIB" # # Enable coloured messages # define_colours # # Sanity checks # JQ=$(command -v jq) [ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; } IA=$(command -v ia) [ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; } REPIT="$BASEDIR/repair_item" [ -e "$REPIT" ] || { echo "Program '$REPIT' was not found"; exit 1; } IADB="$BASEDIR/ia.db" [ -e "$IADB" ] || { echo "Database '$IADB' was not found"; exit 1; } # {{{ -- Functions -- _IA_move, queued_tasks, _verbose, _usage #=== FUNCTION ================================================================ # NAME: _IA_move # DESCRIPTION: Performs a file move on the IA, with retries if it fails. # Assumes the existence of functions 'coloured', '_log', # '_verbose' and '_DEBUG' # PARAMETERS: $1 IA command to run (as a string) # $2 The path to move from # $3 The path to move to # RETURNS: False if the number of retries is exceeded, otherwise true. #=============================================================================== _IA_move () { local from="${1:?Usage _IA_move command from to}" local to="${2:?Usage _IA_move command from to}" local retry_threshold=5 local retries=0 local sleeptime=20 local command="ia move \"$from\" \"$to\" --no-derive --no-backup > /dev/null 2>&1" _DEBUG "$command" # coloured 'blue' "Moving $from to $to" # # Run 'command'. If it succeeds then exit. If it fails enter the 'until' # loop and report the problem, then sleep and try again. Count the number # of times this is done, so it doesn't loop forever. If we have reached # the limit count this as a failure and exit with an error. If we haven't # retried enough yet, sleep for a while and try again. The intention is to # catch the case when an upload times out. The 'ia' command is performing # its own retries per upload when the system is overloaded, but these are # non-fatal. # until eval "$command"; do coloured 'red' "Failure when moving $from to $to" ((retries++)) _log "$(printf 'Failed to move %s to %s [%d]' "$from" "$to" $retries)" [ "$retries" -eq "$retry_threshold" ] && { _verbose \ "$(coloured 'red' "Retry limit reached; abandoning this move")" return 1 } _verbose "$(coloured 'blue' "Pausing for $sleeptime seconds and retrying")" sleep $sleeptime done # until eval ... coloured 'green' "Moved $from to $to on the IA" _log "Moved $from to $to on the IA" return } #=== FUNCTION ================================================================ # NAME: queued_tasks # DESCRIPTION: Queries the IA for any queued or running tasks for an item. # Writes the number to STDOUT so it can be captured. # PARAMETERS: $1 IA item (like hpr1192) # RETURNS: Nothing #=============================================================================== queued_tasks () { local item="${1:?Usage: queued_tasks item}" local -i count=0 count="$(ia tasks "$item" |\ jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')" echo "$count" return } #=== FUNCTION ================================================================ # NAME: make_dir # DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on # errors. # PARAMETERS: $1 directory path # RETURNS: True if success, otherwise exits the caller script #=============================================================================== make_dir () { local dir="${1}" if [[ ! -d $dir ]]; then mkdir -p "$dir" || { coloured 'red' "Failed to create $dir" exit 1 } fi } #=== FUNCTION ================================================================ # NAME: _ifbool # DESCRIPTION: Simplifies conditional expressions when they nned to return # one of two strings. Use as: # echo "Hello $(_ifbool 1 'World' 'Everyone')" → "Hello World" # PARAMETERS: $1 Integer being tested. If 1 then it's true, otherwise # it's false. Non-numeric is treated as 0/false. # $2 String returned for True # $3 String returned for False # RETURNS: Nothing #=============================================================================== _ifbool () { local -i _bool="${1:-0}" local _t="${2:-true}" local _f="${3:-false}" if [ "$_bool" -eq 1 ]; then echo "$_t" else echo "$_f" fi return } #=== FUNCTION ================================================================ # NAME: _log # DESCRIPTION: Appends a record to the file "$LOGFILE" # PARAMETERS: $1 Message to write # RETURNS: Nothing #=============================================================================== _log () { local message="${1}" echo "$(date +%F\ %T) $message" >> "$LOGFILE" } #=== FUNCTION ================================================================ # NAME: _verbose # DESCRIPTION: Writes a message in verbose mode # PARAMETERS: * message strings to write # RETURNS: Nothing #=============================================================================== _verbose () { [ "$VERBOSE" -eq 0 ] && return for msg; do printf '%s\n' "$msg" done } #=== FUNCTION ================================================================ # NAME: _usage # DESCRIPTION: Reports usage; always exits the script after doing so # PARAMETERS: 1 - the integer to pass to the 'exit' command # RETURNS: Nothing #=============================================================================== _usage () { local -i result=${1:-0} cat >$STDOUT <<-endusage ${SCRIPT} - version: ${VERSION} Usage: ./${SCRIPT} [-h] [-D] [-F] [-v] item Attempts to repair an IA item where the upload has failed for some reason. Options: -h Print this help. -d 0|1 Dry run: -d 1 (the default) runs the script in dry-run mode where nothing is changed but the actions that will be taken are reported; -d 0 turns off dry-run mode and the actions will be carried out. -D Run in debug mode where a lot more information is reported. -F Ignore (some) interlocks that will cause failure, such as the existence of the local cache directory for the item being processed. -v Run in verbose mode where more information is reported. Default is off. Arguments: item The item in the form 'hpr1234' endusage exit "$result" } # }}} #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #------------------------------------------------------------------------------- # Directories and files #------------------------------------------------------------------------------- LOGS="$BASEDIR/logs" make_dir "${LOGS}" LOGFILE="$LOGS/$SCRIPT.log" while getopts :d:DFhv opt do case "${opt}" in D) DEBUG=1;; d) DRYRUN=$OPTARG;; F) FORCE=1;; h) _usage 0;; v) VERBOSE=1;; *) echo "** Unknown option" _usage 1;; esac done shift $((OPTIND - 1)) # # Set option defaults and check their values # DRYRUN=${DRYRUN:-1} if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then echo "** Use '-d 0' or '-d 1'" _usage 1 fi [[ $DRYRUN -eq 1 ]] && echo "Dry run mode" DEBUG=${DEBUG:-0} [[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode" FORCE=${FORCE:-0} VERBOSE=${VERBOSE:-0} # # Should have one argument # if [[ $# != 1 ]]; then coloured 'red' "Missing argument" _usage 1 fi item="${1}" # # Ensure item spec is correctly formatted # if [[ $item =~ hpr([0-9]+) ]]; then printf -v item 'hpr%04d' "$((10#${BASH_REMATCH[1]}))" else coloured 'red' "Incorrect show specification: $item" coloured 'yellow' "Use 'hpr9999' format" exit 1 fi _DEBUG "Parsed item: $item" _log "$SCRIPT $VERSION ($(_ifbool "$DRYRUN" 'dry-run' 'live'))" # # Having an entry for the show in 'ia.db' is important, so check there is one # SQL="select 1 from episodes where id = ${item:3}" if [[ $(sqlite3 -list "$IADB" "$SQL" 2>/dev/null) -ne 1 ]]; then coloured 'red' "Unable to find show $item in the local IA database" coloured 'yellow' "Can't continue" exit 1 fi _verbose "$(coloured 'yellow' "Show $item is in the local IA database")" _log "Show $item is in the local IA database" # # It's possible that the show upload failed before anything was uploaded, even # the metadata. It's never been seen, but it seems wise to cater for it. # # TODO: uncomment below; disabled for speed if ! ia metadata "$item" --exists > /dev/null 2>&1; then coloured 'red' "This item is not apparently on the IA; can't continue" exit 1 fi _verbose "$(coloured 'yellow' "Show $item is on the IA")" _log "Show $item is on the IA" # # Directory paths # FROMPARENTDIR="$BACKUP/public_html/eps" FROMDIR="$FROMPARENTDIR/$item" TOPARENTDIR="$REPAIRS/$item" TOASSETDIR="$TOPARENTDIR/$item" # # RE to ignore certain files using 'grep -v -E ...' # IGNORE="($item\.(flac|mp3|ogg|opus|spx|wav)$)" #------------------------------------------------------------------------------- # Check there are asset files on the backup disk before proceeding. At least # we need the transcripts. If no files at all we can't continue. #------------------------------------------------------------------------------- declare -a BACKUPFILES mapfile -t BACKUPFILES < \ <(find "$FROMPARENTDIR" -type f -name "$item*" | grep -v -E "${IGNORE}") _DEBUG "$(coloured 'purple' "Backup files")" "${BACKUPFILES[@]}" if [[ ! -d $FROMDIR || ${#BACKUPFILES[@]} -eq 0 ]]; then coloured 'red' "No files found in $FROMDIR" coloured 'red' "Can't continue!" exit 1 fi _log "Files found on backup disk ${#BACKUPFILES[*]}" #------------------------------------------------------------------------------- # Make the needed local cache directory for later #------------------------------------------------------------------------------- if [[ $FORCE -ne 1 && -e $TOPARENTDIR ]]; then coloured 'red' "Directory $TOPARENTDIR already exists; can't continue". coloured 'yellow' 'This implies that all files have been copied already.' coloured 'yellow' "If you're sure, consider running: '$REPIT -X -d0 $item'" coloured 'yellow' 'Otherwise, consider running again with option -F.' exit 1 else if [[ $DRYRUN -eq 1 ]]; then coloured 'yellow' "Would have created directory $TOPARENTDIR" else mkdir -p "$TOASSETDIR" _verbose "$(coloured 'yellow' "Created directory $TOASSETDIR")" _log "Created directory $TOASSETDIR" fi fi #------------------------------------------------------------------------------- # Collect asset data from the database #------------------------------------------------------------------------------- SQL="select filename from assets where episode_id = ${item:3}" declare -a IADBASSETS mapfile -t IADBASSETS < <(sqlite3 -list "$IADB" "$SQL" 2>/dev/null) _DEBUG "$(coloured 'purple' "SQLite IA DB files")" "${IADBASSETS[@]}" _log "Files found in ia.db ${#IADBASSETS[*]}" #------------------------------------------------------------------------------- # Collect IA data, only original files generated by HPR. We exclude audio # files from this set. #------------------------------------------------------------------------------- JQPROG='.files[] | select(.source == "original" and .format != "Metadata" and ' JQPROG+='.format != "Item Tile") | (.name) | @text' declare -a IAFILES mapfile -t IAFILES < \ <(ia metadata "$item" | $JQ -r "$JQPROG" | grep -v -E "${IGNORE}") _DEBUG "$(coloured 'purple' "IA files (originals)")" "${IAFILES[@]}" _log "Files found on IA (originals) ${#IAFILES[*]}" #------------------------------------------------------------------------------- # Work out whether to copy assets from the backup disk, or whether to move # files on the IA. Whatever we decide we also need to copy transcripts from # the backup disk and upload to the IA #------------------------------------------------------------------------------- # # Check each asset from the $IADB database to see if it's on the IA. We'll get back # a path if it's where we want it, otherwise just a filename. # declare -a MOVES coloured 'purple' "Checking IA files for moves" # # If we find an asset by looking for its basename in the list of files we got # from the IA and if they are the same we need to move such files to the # sub-directory. # for asset in "${IAFILES[@]}"; do # # Skip IA files with directories # if [[ $asset =~ / ]]; then continue fi IA_match=$( grep "${asset}" <(printf '%s\n' "${IADBASSETS[@]}") ) if [[ $IA_match = "$asset" ]]; then MOVES+=("$IA_match") fi done # # If we found any moves then we can move them in the IA item now and copy the # files from the backup disk to the cache in case we need them. They will # eventually get deleted by 'cron'. # if [[ ${#MOVES[@]} -gt 0 ]]; then _DEBUG "$(coloured 'purple' "Files to be moved")" "${MOVES[@]}" "----" mcount=0 for asset in "${MOVES[@]}"; do # source & destination for IA moves iafrom="$item/$asset" iato="$item/$item/$asset" _DEBUG "\$iafrom: $iafrom" "\$iato: $iato" "" # # If IA source and destination are the same no moves are needed. For # the local cache the later 'rsync' will be enough. # if [[ $iafrom != "$iato" ]]; then if [[ $DRYRUN -eq 1 ]]; then coloured 'yellow' "ia move $iafrom $iato --no-derive --no-backup" coloured 'yellow' "cp $FROMPARENTDIR/$asset $TOASSETDIR/" else # # Perform the move. If the retries are exceeded things get # complicated, so just abort so we can try again later. # _verbose "$(coloured 'blue' "Moving $iafrom → $iato on IA")" if _IA_move "$iafrom" "$iato"; then # # Update the cache (but only if the move occurred) # _verbose "$(coloured 'blue' "Copying from backup disk to cache")" cp "$FROMPARENTDIR/$asset" "$TOASSETDIR/" ((mcount++)) else coloured 'red' "Retries exhausted. Aborting recovery" exit 1 fi fi fi done # # Report what was done # coloured 'green' "Moved $mcount $(ngettext file files "$mcount")" _log "Moved $mcount $(ngettext file files "$mcount")" else coloured 'yellow' "No moves needed" _log "No moves needed" fi # # Wait for the IA moves to finish # if [[ $DRYRUN -eq 0 ]]; then if [[ $mcount -gt 0 ]]; then until [[ $(queued_tasks "$item") -eq 0 ]]; do coloured 'yellow' "Waiting for IA tasks to complete" sleep 1m done fi else if [[ $mcount -gt 0 ]]; then coloured 'yellow' "Would have waited for any IA tasks to complete" fi fi #------------------------------------------------------------------------------- # Copy files from the backup disk to the cache #------------------------------------------------------------------------------- if [[ $DRYRUN -eq 1 ]]; then coloured 'yellow' "Would have copied files from backup disk → cache" rsync -n -vaP --exclude=index.html "$FROMDIR" "$TOPARENTDIR" else rsync -vaP --exclude=index.html "$FROMDIR" "$TOPARENTDIR" _verbose "$(coloured 'yellow' "Copied files from $FROMDIR")" _log "Copied files from $FROMDIR" fi # TODO: Is this needed? # # Put any source audio in the right place. # # if [[ $DRYRUN -eq 1 ]]; then # coloured 'yellow' "Would have moved source files if found" # else # # # # Turn on 'nullglob' to get an empty result if the glob expression doesn't # # match. # # # NG=$(shopt -p nullglob) # shopt -s nullglob # # # # # Any source files should be in repairs/hpr1234/ and should go to the IA # # in the comparable place. We will not put it on the HPR server though. # # # # TODO: Is this right? # movecount=0 # for file in "$TOPARENTDIR"/*_source.*; do # if mv "$file" "$TOPARENTDIR"; then # ((movecount++)) # fi # done # # eval "$NG" # # # # # Show the directories after any move # # # if [[ $movecount -gt 0 ]]; then # _verbose "$(coloured 'yellow' "Moved source file(s)")" # ls -lR "$REPAIRS/$item/" # fi # # fi #------------------------------------------------------------------------------- # Using the cache as the reference upload whatever is missing to the IA #------------------------------------------------------------------------------- if [[ $DRYRUN -eq 1 ]]; then coloured 'yellow' "Would have found and repaired missing files" else _verbose "$(coloured 'yellow' "Finding and repairing missing files")" _log "Finding and repairing missing files (with $REPIT)" "$REPIT" -X -d0 "$item" fi # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker