#!/bin/bash -
#===============================================================================
#
#         FILE: recover_transcripts
#
#        USAGE: ./recover_transcripts item
#
#  DESCRIPTION: Intended to be run on `borg`; collects assets from the
#               locally-mounted backup disk and places them in a local
#               directory (organised to be compatible with the IA), then
#               uploads anything that is missing on the IA.
#
#               Version 0.1.* looks for assets in the 'eps/' directory and
#               copies them to the cache. Also moves the IA copies so all is
#               aligned. Many shows earlier than mid 2019 are likely to need
#               this addition.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 0.1.4
#      CREATED: 2024-07-14 13:22:58
#     REVISION: 2024-08-20 17:38:19
#
#===============================================================================

# set -o nounset                              # Treat unset variables as an error

VERSION="0.1.4"

SCRIPT=${0##*/}
# DIR=${0%/*}

STDOUT="/dev/fd/2"

#
# Select the appropriate working directory for the host
#
case $(hostname) in
    i7-desktop)
        echo "To be run only on 'borg'"
        exit 1
        ;;
    borg)
        BASEDIR="$HOME/IA"
        REPAIRS="$BASEDIR/repairs"
        BACKUP="/mnt/backup_disk/HPR/HPR-MIRROR"
        ;;
    *)
        echo "Wrong host!"
        exit 1
        ;;
esac

cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }

#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"

#
# Enable coloured messages
#
define_colours

#
# Sanity checks
#
JQ=$(command -v jq)
[ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; }
IA=$(command -v ia)
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
REPIT="$BASEDIR/repair_item"
[ -e "$REPIT" ] || { echo "Program '$REPIT' was not found"; exit 1; }
IADB="$BASEDIR/ia.db"
[ -e "$IADB" ] || { echo "Database '$IADB' was not found"; exit 1; }

# {{{ -- Functions -- _IA_move, queued_tasks, _verbose, _usage


#===  FUNCTION  ================================================================
#         NAME: _IA_move
#  DESCRIPTION: Performs a file move on the IA, with retries if it fails.
#               Assumes the existence of functions 'coloured', '_log', 
#               '_verbose' and '_DEBUG'
#   PARAMETERS: $1      IA command to run (as a string)
#               $2      The path to move from
#               $3      The path to move to
#      RETURNS: False if the number of retries is exceeded, otherwise true.
#===============================================================================
_IA_move () {
    local from="${1:?Usage _IA_move command from to}"
    local to="${2:?Usage _IA_move command from to}"

    local retry_threshold=5
    local retries=0
    local sleeptime=20
    local command="ia move \"$from\" \"$to\" --no-derive --no-backup > /dev/null 2>&1"
    _DEBUG "$command"

    # coloured 'blue' "Moving $from to $to"

    #
    # Run 'command'. If it succeeds then exit. If it fails enter the 'until'
    # loop and report the problem, then sleep and try again. Count the number
    # of times this is done, so it doesn't loop forever. If we have reached
    # the limit count this as a failure and exit with an error.  If we haven't
    # retried enough yet, sleep for a while and try again. The intention is to
    # catch the case when an upload times out. The 'ia' command is performing
    # its own retries per upload when the system is overloaded, but these are
    # non-fatal.
    #
    until eval "$command"; do
        coloured 'red' "Failure when moving $from to $to"
        ((retries++))

        _log "$(printf 'Failed to move %s to %s [%d]' "$from" "$to" $retries)"

        [ "$retries" -eq "$retry_threshold" ] && {
            _verbose \
                "$(coloured 'red' "Retry limit reached; abandoning this move")"
            return 1
        }

        _verbose "$(coloured 'blue' "Pausing for $sleeptime seconds and retrying")"
        sleep $sleeptime
    done # until eval ...

    coloured 'green' "Moved $from to $to on the IA"
    _log "Moved $from to $to on the IA"

    return
}

#===  FUNCTION  ================================================================
#         NAME: queued_tasks
#  DESCRIPTION: Queries the IA for any queued or running tasks for an item.
#               Writes the number to STDOUT so it can be captured.
#   PARAMETERS: $1      IA item (like hpr1192)
#      RETURNS: Nothing
#===============================================================================
queued_tasks () {
    local item="${1:?Usage: queued_tasks item}"
    local -i count=0

    count="$(ia tasks "$item" |\
        jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"

    echo "$count"

    return
}

#===  FUNCTION  ================================================================
#         NAME: make_dir
#  DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on
#               errors.
#   PARAMETERS: $1      directory path
#      RETURNS: True if success, otherwise exits the caller script
#===============================================================================
make_dir () {
    local dir="${1}"

    if [[ ! -d $dir ]]; then
        mkdir -p "$dir" || {
            coloured 'red' "Failed to create $dir"
            exit 1
        }
    fi
}

#===  FUNCTION  ================================================================
#         NAME: _ifbool
#  DESCRIPTION: Simplifies conditional expressions when they nned to return
#               one of two strings. Use as:
#               echo "Hello $(_ifbool 1 'World' 'Everyone')" → "Hello World"
#   PARAMETERS: $1      Integer being tested. If 1 then it's true, otherwise
#                       it's false. Non-numeric is treated as 0/false.
#               $2      String returned for True
#               $3      String returned for False
#      RETURNS: Nothing
#===============================================================================
_ifbool () {
    local -i _bool="${1:-0}"
    local _t="${2:-true}"
    local _f="${3:-false}"

    if [ "$_bool" -eq 1 ]; then
        echo "$_t"
    else
        echo "$_f"
    fi

    return
}

#===  FUNCTION  ================================================================
#         NAME: _log
#  DESCRIPTION: Appends a record to the file "$LOGFILE"
#   PARAMETERS: $1      Message to write
#      RETURNS: Nothing
#===============================================================================
_log () {
    local message="${1}"

    echo "$(date +%F\ %T) $message" >> "$LOGFILE"
}

#===  FUNCTION  ================================================================
#         NAME: _verbose
#  DESCRIPTION: Writes a message in verbose mode
#   PARAMETERS: *       message strings to write
#      RETURNS: Nothing
#===============================================================================
_verbose () {
    [ "$VERBOSE" -eq 0 ] && return
    for msg; do
        printf '%s\n' "$msg"
    done
}

#===  FUNCTION  ================================================================
#         NAME: _usage
#  DESCRIPTION: Reports usage; always exits the script after doing so
#   PARAMETERS: 1 - the integer to pass to the 'exit' command
#      RETURNS: Nothing
#===============================================================================
_usage () {
    local -i result=${1:-0}

    cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}

Usage: ./${SCRIPT} [-h] [-D] [-F] [-v] item

Attempts to repair an IA item where the upload has failed for some reason.

Options:
  -h                    Print this help.
  -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run
                        mode where nothing is changed but the actions that
                        will be taken are reported; -d 0 turns off dry-run
                        mode and the actions will be carried out.
  -D                    Run in debug mode where a lot more information is
                        reported.
  -F                    Ignore (some) interlocks that will cause failure, such
                        as the existence of the local cache directory for the
                        item being processed.
  -v                    Run in verbose mode where more information is
                        reported. Default is off.

Arguments:
    item                The item in the form 'hpr1234'

endusage
    exit "$result"
}

# }}}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#-------------------------------------------------------------------------------
# Directories and files
#-------------------------------------------------------------------------------
LOGS="$BASEDIR/logs"
make_dir "${LOGS}"
LOGFILE="$LOGS/$SCRIPT.log"

while getopts :d:DFhv opt
do
    case "${opt}" in
        D) DEBUG=1;;
        d) DRYRUN=$OPTARG;;
        F) FORCE=1;;
        h) _usage 0;;
        v) VERBOSE=1;;
        *) echo "** Unknown option"
           _usage 1;;
    esac
done
shift $((OPTIND - 1))

#
# Set option defaults and check their values
#
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
    echo "** Use '-d 0' or '-d 1'"
    _usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"

DEBUG=${DEBUG:-0}
[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode"

FORCE=${FORCE:-0}

VERBOSE=${VERBOSE:-0}

#
# Should have one argument
#
if [[ $# -ne 1 ]]; then
    coloured 'red' "Missing argument"
    _usage 1
fi
item="${1}"

#
# Ensure item spec is correctly formatted
#
if [[ $item =~ hpr([0-9]+) ]]; then
    printf -v item 'hpr%04d' "$((10#${BASH_REMATCH[1]}))"
else
    coloured 'red' "Incorrect show specification: $item"
    coloured 'yellow' "Use 'hpr9999' format"
    exit 1
fi
_DEBUG "Parsed item: $item"

_log "$SCRIPT $VERSION ($(_ifbool "$DRYRUN" 'dry-run' 'live'))"

#
# Having an entry for the show in 'ia.db' is important, so check there is one
#
SQL="select 1 from episodes where id = ${item:3}"
if [[ $(sqlite3 -list "$IADB" "$SQL" 2>/dev/null) -ne 1 ]]; then
    coloured 'red' "Unable to find show $item in the local IA database"
    coloured 'yellow' "Can't continue"
    exit 1
fi
_verbose "$(coloured 'yellow' "Show $item is in the local IA database")"
_log "Show $item is in the local IA database"

#
# It's possible that the show upload failed before anything was uploaded, even
# the metadata. It's never been seen, but it seems wise to cater for it.
#
# TODO: uncomment below; disabled for speed
if ! ia metadata "$item" --exists > /dev/null 2>&1; then
    coloured 'red' "This item is not apparently on the IA; can't continue"
    exit 1
fi
_verbose "$(coloured 'yellow' "Show $item is on the IA")"
_log "Show $item is on the IA"

#
# Directory paths
#
FROMPARENTDIR="$BACKUP/public_html/eps"
FROMDIR="$FROMPARENTDIR/$item"
TOPARENTDIR="$REPAIRS/$item"
TOASSETDIR="$TOPARENTDIR/$item"

#
# RE to ignore certain files using 'grep -v -E ...'
#
IGNORE="($item\.(flac|mp3|ogg|opus|spx|wav)$)"

#-------------------------------------------------------------------------------
# Check there are asset files on the backup disk before proceeding. At least
# we need the transcripts. If no files at all we can't continue.
#-------------------------------------------------------------------------------
declare -a BACKUPFILES
mapfile -t BACKUPFILES < \
    <(find "$FROMPARENTDIR" -type f -name "$item*" | grep -v -E "${IGNORE}")
_DEBUG "$(coloured 'purple' "Backup files")" "${BACKUPFILES[@]}"

if [[ ! -d $FROMDIR || ${#BACKUPFILES[@]} -eq 0 ]]; then
    coloured 'red' "No files found in $FROMDIR"
    coloured 'red' "Can't continue!"
    exit 1
fi

_log "Files found on backup disk ${#BACKUPFILES[*]}"

#-------------------------------------------------------------------------------
# Make the needed local cache directory for later
#-------------------------------------------------------------------------------
if [[ $FORCE -ne 1 && -e $TOPARENTDIR ]]; then
    coloured 'red' "Directory $TOPARENTDIR already exists; can't continue".
    coloured 'yellow' 'This implies that all files have been copied already.'
    coloured 'yellow' "If you're sure, consider running: '$REPIT -X -d0 $item'"
    coloured 'yellow' 'Otherwise, consider running again with option -F.'
    exit 1
else
    if [[ $DRYRUN -eq 1 ]]; then
        coloured 'yellow' "Would have created directory $TOPARENTDIR"
    else
        mkdir -p "$TOASSETDIR"
        _verbose "$(coloured 'yellow' "Created directory $TOASSETDIR")"
        _log "Created directory $TOASSETDIR"
    fi
fi

#-------------------------------------------------------------------------------
# Collect asset data from the database
#-------------------------------------------------------------------------------
SQL="select filename from assets where episode_id = ${item:3}"
declare -a IADBASSETS
mapfile -t IADBASSETS < <(sqlite3 -list "$IADB" "$SQL" 2>/dev/null)
_DEBUG "$(coloured 'purple' "SQLite IA DB files")" "${IADBASSETS[@]}"
_log "Files found in ia.db ${#IADBASSETS[*]}"

#-------------------------------------------------------------------------------
# Collect IA data, only original files generated by HPR. We exclude audio
# files from this set.
#-------------------------------------------------------------------------------
JQPROG='.files[] | select(.source == "original" and .format != "Metadata" and '
JQPROG+='.format != "Item Tile") | (.name) | @text'
declare -a IAFILES
mapfile -t IAFILES < \
    <(ia metadata "$item" | $JQ -r "$JQPROG" | grep -v -E "${IGNORE}")
_DEBUG "$(coloured 'purple' "IA files (originals)")" "${IAFILES[@]}"
_log "Files found on IA (originals) ${#IAFILES[*]}"

#-------------------------------------------------------------------------------
# Work out whether to copy assets from the backup disk, or whether to move
# files on the IA. Whatever we decide we also need to copy transcripts from
# the backup disk and upload to the IA
#-------------------------------------------------------------------------------
#
# Check each asset from the $IADB database to see if it's on the IA. We'll get back
# a path if it's where we want it, otherwise just a filename.
#
declare -a MOVES

coloured 'purple' "Checking IA files for moves"

#
# If we find an asset by looking for its basename in the list of files we got
# from the IA and if they are the same we need to move such files to the
# sub-directory.
#
for asset in "${IAFILES[@]}"; do
    #
    # Skip IA files with directories
    #
    if [[ $asset =~ / ]]; then
        continue
    fi

    IA_match=$( grep "${asset}" <(printf '%s\n' "${IADBASSETS[@]}") )
    if [[ $IA_match = "$asset" ]]; then
        MOVES+=("$IA_match")
    fi
done

#
# If we found any moves then we can move them in the IA item now and copy the
# files from the backup disk to the cache in case we need them. They will
# eventually get deleted by 'cron'.
#
if [[ ${#MOVES[@]} -gt 0 ]]; then
    _DEBUG "$(coloured 'purple' "Files to be moved")" "${MOVES[@]}" "----"

    mcount=0
    for asset in "${MOVES[@]}"; do
        # source & destination for IA moves
        iafrom="$item/$asset"
        iato="$item/$item/$asset"
        _DEBUG "\$iafrom: $iafrom" "\$iato:   $iato" ""

        #
        # If IA source and destination are the same no moves are needed. For
        # the local cache the later 'rsync' will be enough.
        #
        if [[ $iafrom != "$iato" ]]; then
            if [[ $DRYRUN -eq 1 ]]; then
                coloured 'yellow' "ia move $iafrom $iato --no-derive --no-backup"
                coloured 'yellow' "cp $FROMPARENTDIR/$asset $TOASSETDIR/"
            else
                #
                # Perform the move. If the retries are exceeded things get
                # complicated, so just abort so we can try again later.
                #
                _verbose "$(coloured 'blue' "Moving $iafrom → $iato on IA")"
                if _IA_move "$iafrom" "$iato"; then
                    #
                    # Update the cache (but only if the move occurred)
                    #
                    _verbose "$(coloured 'blue' "Copying from backup disk to cache")"
                    cp "$FROMPARENTDIR/$asset" "$TOASSETDIR/"

                    ((mcount++))
                else
                    coloured 'red' "Retries exhausted. Aborting recovery"
                    exit 1
                fi
            fi
        fi

    done
    #
    # Report what was done
    #
    coloured 'green' "Moved $mcount $(ngettext file files "$mcount")"
    _log "Moved $mcount $(ngettext file files "$mcount")"

else
    coloured 'yellow' "No moves needed"
    _log "No moves needed"
fi

#
# Wait for the IA moves to finish
#
if [[ $DRYRUN -eq 0 ]]; then
    if [[ $mcount -gt 0 ]]; then
        until [[ $(queued_tasks "$item") -eq 0 ]]; do
            coloured 'yellow' "Waiting for IA tasks to complete"
            sleep 1m
        done
    fi
else
    if [[ $mcount -gt 0 ]]; then
        coloured 'yellow' "Would have waited for any IA tasks to complete"
    fi
fi

#-------------------------------------------------------------------------------
# Copy files from the backup disk to the cache
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 1 ]]; then
    coloured 'yellow' "Would have copied files from backup disk → cache"
    rsync -n -vaP --exclude=index.html "$FROMDIR" "$TOPARENTDIR"
else
    rsync -vaP --exclude=index.html "$FROMDIR" "$TOPARENTDIR"
    _verbose "$(coloured 'yellow' "Copied files from $FROMDIR")"
    _log "Copied files from $FROMDIR"
fi

# TODO: Is this needed?
#
# Put any source audio in the right place.
#
# if [[ $DRYRUN -eq 1 ]]; then
#     coloured 'yellow' "Would have moved source files if found"
# else
#     #
#     # Turn on 'nullglob' to get an empty result if the glob expression doesn't
#     # match.
#     #
#     NG=$(shopt -p nullglob)
#     shopt -s nullglob
#
#     #
#     # Any source files should be in repairs/hpr1234/ and should go to the IA
#     # in the comparable place. We will not put it on the HPR server though.
#     #
#     # TODO: Is this right?
#     movecount=0
#     for file in "$TOPARENTDIR"/*_source.*; do
#         if mv "$file" "$TOPARENTDIR"; then
#             ((movecount++))
#         fi
#     done
#
#     eval "$NG"
#
#     #
#     # Show the directories after any move
#     #
#     if [[ $movecount -gt 0 ]]; then
#         _verbose "$(coloured 'yellow' "Moved source file(s)")"
#         ls -lR "$REPAIRS/$item/"
#     fi
#
# fi

#-------------------------------------------------------------------------------
# Using the cache as the reference upload whatever is missing to the IA
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 1 ]]; then
    coloured 'yellow' "Would have found and repaired missing files"
else
    _verbose "$(coloured 'yellow' "Finding and repairing missing files")"
    _log "Finding and repairing missing files (with $REPIT)"
    "$REPIT" -X -d0 "$item"
fi

# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker