#!/bin/bash -
#===============================================================================
#
#         FILE: repair_assets
#
#        USAGE: ./repair_assets showid
#
#  DESCRIPTION: Given a show where there was a directory of asset files on the
#               old HPR server which got lost in the migration, rebuild it
#               and fill it with assets from the IA. Modify the show notes to
#               point to these recovered assets.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 0.0.10
#      CREATED: 2024-05-10 21:26:31
#     REVISION: 2024-10-02 17:34:47
#
#===============================================================================

# set -o nounset                              # Treat unset variables as an error

VERSION="0.0.10"

SCRIPT=${0##*/}
# DIR=${0%/*}

STDOUT="/dev/fd/2"

#
# Select the appropriate working directory for the host
#
case $(hostname) in
    i7-desktop)
        BASEDIR="$HOME/HPR/InternetArchive"
        ;;
    borg)
        BASEDIR="$HOME/IA"
        ;;
    *)
        echo "Wrong host!"
        exit 1
        ;;
esac

cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }

#
# Load library functions
#
LIB="$HOME/HPR/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"

#
# Enable coloured messages
#
define_colours

#
# Sanity checks. Calling 'command' on a non-existent script/program will
# return an empty string. This works for built-in stuff and things on the
# PATH. These are all 'true' tests with the stuff in braces being run if they
# are not true.
#
IA=$(command -v ia)
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
Q2T=$(command -v query2tt2)
[ -n "$Q2T" ] || { echo "Program 'query2tt2' was not found"; exit 1; }
FIXAL="$BASEDIR/fix_asset_links"
[ -e "$FIXAL" ] || { echo "Program '$FIXAL' was not found"; exit 1; }
TUNNEL_IS_OPEN=$(command -v tunnel_is_open)
[ -n "$TUNNEL_IS_OPEN" ] || { echo "Program 'tunnel_is_open' was not found"; exit 1; }
OPEN_TUNNEL=$(command -v open_tunnel)
[ -n "$OPEN_TUNNEL" ] || { echo "Program 'open_tunnel' was not found"; exit 1; }

#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
TMP2=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1 $TMP2' SIGHUP SIGINT SIGPIPE SIGTERM EXIT

# {{{ -- Functions -- _verbose, _usage, _log, find_missing, make_dir

#===  FUNCTION  ================================================================
#         NAME: find_missing
#  DESCRIPTION: Given two arrays containing IA assets and HPR assets,
#               determine which IA assets are missing from the HPR list.
#   PARAMETERS: $1      (nameref) IA list
#               $2      (nameref) HPR list
#               $3      Name of array to receive list of missing assets
#      RETURNS: Nothing
#===============================================================================
# find_missing () {
#     local -n IA="${1}"
#     local -n HPR="${2}"
#     local output="${3}"
#
#     local -A hIA hHPR
#     local i key
#
#     #
#     # Make a hash keyed by the IA file base names from an indexed array
#     #
#     for (( i=0; i<${#IA[@]}; i++ )); do
#         hIA+=([${IA[$i]##*/}]=${IA[$i]})
#     done
#
#     #
#     # Make a hash keyed by the HPR file base names from an indexed array
#     #
#     for (( i=0; i<${#HPR[@]}; i++ )); do
#         hHPR+=([${HPR[$i]##*/}]=${HPR[$i]})
#     done
#
#     #
#     # Use the basename keys to check what's missing, but return the full path
#     # names.
#     #
#     for key in "${!hIA[@]}"; do
#         if ! exists_in hHPR "$key"; then
#             eval "$output+=('${hIA[$key]}')"
#         fi
#     done
# }
find_missing () {
    local -n IA="${1}"
    local -n HPR="${2}"
    local output="${3}"

    local -A hIA hHPR
    local i key

    #
    # Make a hash keyed by the full IA paths from an indexed array
    #
    for (( i=0; i<${#IA[@]}; i++ )); do
        hIA+=([${IA[$i]}]=$i)
    done

    #
    # Make a hash keyed by the HPR file paths from an indexed array, but
    # remove the first element for parity with the IA paths. We are going to
    # copy the IA paths, not these, so we never need the full paths again
    # here.
    #
    for (( i=0; i<${#HPR[@]}; i++ )); do
        hHPR+=([${HPR[$i]#*/}]=$i)
    done

    #
    # Use the full path keys to check what's missing, and return the IA full
    # path names.
    #
    for key in "${!hIA[@]}"; do
        if ! exists_in hHPR "$key"; then
            eval "$output+=('$key')"
        fi
    done
}

#===  FUNCTION  ================================================================
#         NAME: make_dir
#  DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on
#               errors.
#   PARAMETERS: $1      directory path
#      RETURNS: True if success, otherwise exits the caller script
#===============================================================================
make_dir () {
    local dir="${1}"

    if [[ ! -d $dir ]]; then
        mkdir -p "$dir" || {
            coloured 'red' "Failed to create $dir"
            exit 1
        }
    fi
}

#===  FUNCTION  ================================================================
#         NAME: _verbose
#  DESCRIPTION: Writes a message in verbose mode
#   PARAMETERS: *       message strings to write
#      RETURNS: Nothing
#===============================================================================
_verbose () {
    [ "$VERBOSE" -eq 0 ] && return
    for msg; do
        printf '%s\n' "$msg"
    done
}

#===  FUNCTION  ================================================================
#         NAME: _log
#  DESCRIPTION: Appends a record to the file "$LOGFILE"
#   PARAMETERS: $1      Message to write
#      RETURNS: Nothing
#===============================================================================
_log () {
    local message="${1}"

    echo "$(date +%F\ %T) $message" >> "$LOGFILE"
}

#===  FUNCTION  ================================================================
#         NAME: _usage
#  DESCRIPTION: Reports usage; always exits the script after doing so
#   PARAMETERS: 1 - the integer to pass to the 'exit' command
#      RETURNS: Nothing
#===============================================================================
_usage () {
    local -i result=${1:-0}

    cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}

Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] showid

Attempts to repair an show where the directory of assets was not transferred
from the old HPR server.

Options:
  -h                    Print this help
  -v                    Run in verbose mode where more information is
                        reported. Default is off. If -v is repeated it
                        increases the verbosity level (levels 1 and 2 only).
  -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run
                        mode where nothing is changed but the actions that
                        will be taken are reported; -d 0 turns off dry-run
                        mode and the actions will be carried out.
  -D                    Run in debug mode where a lot more information is
                        reported

Arguments:
    showid              The show id in the form 'hpr1234'

endusage
    exit "$result"
}

# }}}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#-------------------------------------------------------------------------------
# Directories and files
#-------------------------------------------------------------------------------
LOGS="$BASEDIR/logs"
make_dir "${LOGS}"
LOGFILE="$LOGS/$SCRIPT.log"

#-------------------------------------------------------------------------------
# Options
#-------------------------------------------------------------------------------
# Default settings
#
VERBOSE=0

#
# Process options
#
while getopts :d:Dhv opt
do
    case "${opt}" in
        D) DEBUG=1;;
        d) DRYRUN=$OPTARG;;
        h) _usage 0;;
        v) ((VERBOSE++));;
        *) echo "** Unknown option"
           _usage 1;;
    esac
done
shift $((OPTIND - 1))

#
# Set option defaults and check their values
#
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
    coloured 'red' "** Use '-d 0' or '-d 1'"
    _usage 1
fi
[[ $VERBOSE -gt 0 && $DRYRUN -eq 1 ]] && echo "Dry run mode"

DEBUG=${DEBUG:-0}
[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode"

#-------------------------------------------------------------------------------
# Argument check
#-------------------------------------------------------------------------------
# Should have one argument
#
if [[ $# != 1 ]]; then
    coloured 'red' "Missing argument"
    _usage 1
fi
show="${1,,}"

#
# Ensure show id is correctly formatted. We want it to be 'hpr1234' but we
# allow the 'hpr' bit to be omitted, as well as any leading zeroes. We need to
# handle the weirdness of "leading zero means octal" though, but we always
# store it as 'hpr1234' once processed.
#
if [[ $show =~ (hpr)?([0-9]+) ]]; then
    printf -v show 'hpr%04d' "$((10#${BASH_REMATCH[2]}))"
else
    coloured 'red' "Incorrect show specification: $show"
    coloured 'yellow' "Use 'hpr9999' or '9999' format"
    exit 1
fi
_DEBUG "Parsed item: $show"
echo "Processing show $show"
_log "Processing show $show; dry-run: $([ "$DRYRUN" -eq 1 ] && echo "on" || echo "off")"

#-------------------------------------------------------------------------------
# Declarations and constants
#-------------------------------------------------------------------------------
declare -a iacache

#
# SHOWURL is where the show will be on the webserver
#
printf -v SHOWURL 'https://hackerpublicradio.org/eps/%s/index.html' "$show"

#
# CACHEDIR is where we store asset details and files
#
CACHEDIR="$BASEDIR/assets"
[ ! -d "$CACHEDIR" ] && {
    coloured 'red' "Creating cache directory"
    make_dir "$CACHEDIR"
}

#
# Pointers into the cache:
# LOCAL_ASSETDIR  - where the cache for this show lives
# LOCAL_FILEDIR   - where the IA files have been placed
# LOCAL_PARENTDIR - the equivalent directory to the top show dir
#
LOCAL_ASSETDIR="$CACHEDIR/${show}"
LOCAL_FILEDIR="$LOCAL_ASSETDIR/files"
LOCAL_PARENTDIR="$LOCAL_FILEDIR/${show}"

#
# Pointers to the HPR server directories:
# REMOTE_ASSETDIR  - where the assets are to go
# REMOTE_PARENTDIR - the remote parent directory
#
REMOTE_ASSETDIR="public_html/eps/${show}/${show}"
REMOTE_PARENTDIR="public_html/eps/${show}"

CMDTPL='ssh hpr@hackerpublicradio.org %s'

MANIFEST="$CACHEDIR/$show/manifest"
DBNOTES="$CACHEDIR/$show/notes.html"

#-------------------------------------------------------------------------------
# Check the show exists in the database (or is visible on the website).
#-------------------------------------------------------------------------------
_verbose "Checking the show exists on the HPR server"
result=$(curl --head --silent --write-out "%{http_code}" --output /dev/null "$SHOWURL")
if [[ $result -eq 404 ]]; then
    coloured 'red' "Could not detect show '$show' on the HPR server"
    _log "Show '$show' not on the HPR server"
    exit 1
fi

#-------------------------------------------------------------------------------
# Check the show exists on the IA
#-------------------------------------------------------------------------------
_verbose "Checking the show exists on the IA server"
if ! ia metadata "$show" --exists > /dev/null 2>&1; then
    coloured 'red' "Could not detect show '$show' on the IA server"
    coloured 'yellow' "Check that archive.org is available"
    coloured 'yellow' "Try https://downfor.io/internet-archive"
    _log "Show '$show' not on the IA server"
    exit 1
fi

#-------------------------------------------------------------------------------
# Check IA, collect contents, classify them
#-------------------------------------------------------------------------------
# Interrogate the IA for the required item contents. If it returns True we can
# collect its contents, otherwise we can't proceed. The file 'TMP1' contains
# just a simple list of the files on the IA relating to this item.
#
_verbose "Collecting filenames from the IA server"
if ia list "$show" > "$TMP1"; then
    while read -r iafile; do
        iacache+=("$iafile")
    done < "$TMP1"
else
    coloured 'red' "Item $show can't be found on the IA"
    coloured 'red' "Can't continue"
    _log "Files for show '$show' not on the IA server"
    exit 1
fi

_DEBUG "IA cache" "${iacache[@]}"

#
# Determine which files are assets
#
_verbose "Categorising files held on the IA"

declare -a audio ia_transcript ia_asset

audio_re="^${show}\.(flac|mp3|ogg|opus|spx|wav)\$"
# transcript_re="^${show}/${show}/${show}\.(json|srt|tsv|txt|vtt)\$"
transcript_re="^${show}/${show}\.(json|srt|tsv|txt|vtt)\$"
asset_re="^${show}/(${show}/)?.*\$"
metadata_re="^(__ia_thumb.jpg|${show}[^/]+\.(afpk|torrent|gz|xml|sqlite|png))\$"

for file in "${iacache[@]}"; do
    if [[ $file =~ $audio_re ]]; then
        audio+=("$file")
    elif [[ $file =~ $metadata_re ]]; then
        _verbose "Skipping $file"
        continue
    elif [[ $file =~ $transcript_re ]]; then
        ia_transcript+=("$file")
    elif [[ $file =~ $asset_re ]]; then
        ia_asset+=("$file")
    fi
done

#
# Report what was collected at verbosity level 2
#
if [[ $VERBOSE -gt 1 ]]; then
    coloured 'cyan' "** audio (${#audio[@]}):"
    printf '%s\n' "${audio[@]}"

    coloured 'cyan' "** transcript (${#ia_transcript[@]}):"
    printf '%s\n' "${ia_transcript[@]}"

    coloured 'cyan' "** asset (${#ia_asset[@]}):"
    printf '%s\n' "${ia_asset[@]}"

    _log "IA asset count for show '$show' = ${#ia_asset[@]}"
fi

#
# No assets, no need to proceed!
#
if [[ ${#ia_asset[@]} -eq 0 ]]; then
    coloured 'green' "No IA assets found for show $show; nothing to do"
    _log "Nothing to do for show $show"
    exit
fi

#-------------------------------------------------------------------------------
# Check what's on the HPR server
#-------------------------------------------------------------------------------
#
# 'rc' is the remote command template
#
printf -v rc 'find public_html/eps/%s -type f -printf "%s/%%P\\n"' "$show" "$show"

#
# 'command' is the local command we'll run to run a remote command on the HPR
# server
#
# shellcheck disable=SC2059 disable=SC2089
printf -v command "$CMDTPL" "'$rc'"

if [[ $VERBOSE -gt 1 ]]; then
    echo "Command: $command"
fi

declare -a hpr_asset
ignore_re="index.html$"

#
# Run the command and save the output. Save the asset names returned in an
# array. TODO: Handle errors from the command
#
#
# NOTE: We also want to interrogate the HPR state in dry-run mode
#
# if [[ $DRYRUN -eq 0 ]]; then
# else
#     coloured 'yellow' "Would have searched for assets on the HPR server"
# fi

eval "$command" > "$TMP2"
RES=$?
if [[ $RES -eq 0 ]]; then
    _verbose "$(coloured 'green' "Remote command successful")"
    while read -r hprfile; do
        if [[ ! $hprfile =~ $ignore_re ]]; then
            hpr_asset+=("${hprfile}")
        fi
    done < "$TMP2"
    _verbose "$(coloured 'green' "Assets found on HPR server = ${#hpr_asset[@]}")"
    _verbose "$(printf '%s\n' "${hpr_asset[@]}")"
    _log "Assets found on HPR server = ${#hpr_asset[@]}"
else
    coloured 'red' "Remote command failed"
    _log "Failed while searching for HPR assets"
    exit 1
fi

#-------------------------------------------------------------------------------
# Compare the two asset lists and return what's missing on the HPR server
#-------------------------------------------------------------------------------
# TODO: The algorithm in find_missing does not handle the instance where there
# are pictures in one directory and a lower directory containing thumbnails,
# AND THE FILE NAMES ARE THE SAME!
#
declare -a missing
if [[ ${#hpr_asset[@]} -eq 0 ]]; then
    missing=( "${ia_asset[@]}" )
else
    find_missing ia_asset hpr_asset missing
fi
_verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")"
_verbose "$(printf '%s\n' "${missing[@]}")"

if [[ ${#missing[@]} -eq 0 ]]; then
    coloured 'green' "No missing assets detected; nothing to do"
    _log "No missing assets detected; nothing to do"
    exit
else
    coloured 'yellow' \
        "Found ${#missing[@]} $(ngettext file files ${#missing[@]}) missing on the HPR server"
fi

#-------------------------------------------------------------------------------
# Prepare to copy the missing files
#-------------------------------------------------------------------------------
make_dir "$LOCAL_FILEDIR"

declare -a downloads

#
# Check whether files are already downloaded
#
for file in "${missing[@]}"; do
    if [[ ! -e "$LOCAL_FILEDIR/$show/$file" ]]; then
        downloads+=("$file")
    fi
done

_verbose "$(coloured 'cyan' "** downloads (${#downloads[@]}):")"
_verbose "$(printf '%s\n' "${downloads[@]}")"

#
# If we have files to download get them now
#
if [[ ${#downloads[@]} -gt 0 ]]; then
    if [[ $DRYRUN -eq 1 ]]; then
        coloured 'yellow' "Would have downloaded missing files from the IA"
    else
        ia download "$show" --destdir="$LOCAL_FILEDIR" "${downloads[@]}"
        RES=$?
        if [[ $RES -eq 0 ]]; then
            coloured 'green' "Downloads complete"
            _log "Downloaded IA assets for show $show"
        fi
    fi
else
    coloured 'yellow' "IA files are already downloaded"
fi

# shellcheck disable=SC2089
RSYNCTPL="rsync -a -e 'ssh' %s hpr@hpr:%s"

#-------------------------------------------------------------------------------
# Build the 'ssh' command to make a directory
#-------------------------------------------------------------------------------
#
# Prepare to make the remote directory if necessary.
#
# - $rc is the remote command we'll run on the server
# - $command is the full 'ssh' command including $rc
#
printf -v rc 'if [ ! -e "%s" ]; then mkdir -p "%s"; fi' \
    "$REMOTE_ASSETDIR" "$REMOTE_ASSETDIR"

# shellcheck disable=SC2059 disable=SC2089
printf -v command "$CMDTPL" "'$rc'"

#-------------------------------------------------------------------------------
# Run or report the command that would be run
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 0 ]]; then
    eval "$command"
    RES=$?
    if [[ $RES -eq 0 ]]; then
        coloured 'green' "Remote directory creation successful"
    else
        coloured 'red' "Remote directory creation failed"
    fi
else
    coloured 'yellow' "Would have created the remote directory"
    echo "$command"
fi

#-------------------------------------------------------------------------------
# Synchronise assets to the directory
#-------------------------------------------------------------------------------
# We perform an 'rsync' over 'ssh' to synchronise files from
# ~/HPR/InternetArchive/assets/hprXXXX/files/hprXXXX to
# public_html/eps/hprXXXX (on the HPR server)
#
# shellcheck disable=SC2059 disable=SC2089
printf -v command "$RSYNCTPL" "$LOCAL_PARENTDIR/" "$REMOTE_PARENTDIR/"

if [[ $DRYRUN -eq 0 ]]; then
    eval "$command"
    RES=$?
    if [[ $RES -eq 0 ]]; then
        coloured 'green' "Remote upload successful"
        _log "Uploaded assets for show $show"
    else
        coloured 'red' "Remote upload failed"
        exit 1
    fi
else
    coloured 'yellow' "Would have synchronised local assets with the remote directory"
    echo "$command"
fi

#-------------------------------------------------------------------------------
# Make a 'manifest' file if necessary
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 0 ]]; then
    if [[ ! -e $MANIFEST ]]; then
        find "$LOCAL_PARENTDIR" -type f -printf '%P\n' > "$MANIFEST"
        _verbose "$(coloured 'green' "Created manifest file")"
        _log "Created manifest file $MANIFEST"
    fi
fi

#-------------------------------------------------------------------------------
# Save the notes from the database if necessary
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 0 ]]; then
    if [[ ! -e $DBNOTES ]]; then
        if ! $TUNNEL_IS_OPEN; then
            $OPEN_TUNNEL
        fi
        if $Q2T -config="$BASEDIR/.hpr_livedb.cfg" \
                -temp="$BASEDIR/query2tt2_nokey.tpl" \
                -out="$DBNOTES" \
                -dbarg="${show:3}" \
                'select notes from eps where id = ?'
        then
            _verbose "$(coloured 'green' "Created notes file")"
            _log "Created notes file $DBNOTES"
        else
            _verbose "$(coloured 'red' "Creation of notes file failed")"
            _log "Creation of notes file $DBNOTES failed"
        fi
    fi
fi

#-------------------------------------------------------------------------------
# Adjust the notes with 'fix_asset_links' (exists but not fully implemented
# yet :-)
#-------------------------------------------------------------------------------
if [[ $DRYRUN -eq 0 ]]; then
    echo "$FIXAL"
    # $FIXAL
fi


#
# All done
#
if [[ $DRYRUN -eq 0 ]]; then
    _log "Repaired show $show"
fi

#-------------------------------------------------------------------------------
# √ Make a place to hold the files on this machine
# √ Download them from the IA
# √ Make a directory on the HPR server
# √ Copy the assets to the HPR server
# √ Modify the notes to point to the assets on the server
#-------------------------------------------------------------------------------

# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker