diff --git a/Database/query2tt2 b/Database/query2tt2 index 75e3704..9847e47 100755 --- a/Database/query2tt2 +++ b/Database/query2tt2 @@ -26,9 +26,9 @@ # BUGS: --- # NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com -# VERSION: 0.0.4 +# VERSION: 0.0.5 # CREATED: 2021-06-18 13:24:49 -# REVISION: 2024-01-19 17:15:45 +# REVISION: 2024-06-29 18:42:49 # #=============================================================================== @@ -59,7 +59,7 @@ use Data::Dumper; # # Version number (manually incremented) # -our $VERSION = '0.0.4'; +our $VERSION = '0.0.5'; # # Script and directory names @@ -113,7 +113,7 @@ Options( \%options ); # # Default help # -pod2usage( -msg => "Version $VERSION\n", -exitval => 1 ) +pod2usage( -msg => "Version $VERSION\n", -exitval => 1, -verbose => 0 ) if ( $options{'help'} ); # @@ -234,7 +234,7 @@ catch ($e) { print STDERR "Failed to execute query.\n"; print STDERR "Placeholder/Argument mismatch: $pcount/$acount\n"; exit; -}; +} # # Grab everything from the query as an arrayref of hashrefs @@ -325,7 +325,7 @@ sub _dbargs { #=== FUNCTION ================================================================ # NAME: _define -# PURPOSE: Handles multiple instances of the same option '-define x=42' +# PURPOSE: Handles multiple instances of the option '-define x=42' # PARAMETERS: $opts hash reference holding the options # RETURNS: A hash containing all of the named items (e.g. { 'x' => 42 }) # DESCRIPTION: If there are -define options they will be a hashref in the hash @@ -386,12 +386,13 @@ query2tt2 - A script for formatting a report from database query using a templat =head1 VERSION -This documentation refers to query2tt2 version 0.0.4 +This documentation refers to query2tt2 version 0.0.5 =head1 USAGE - query2tt2 [-help] [-debug=N] [-config=FILE] [-query=FILE] - [-template=FILE] [QUERY] + query2tt2 [-help] [-doc] [-debug=N] [-config=FILE] [-query=FILE] + [-template=FILE] [-dbargs=ARG1 [-dbarg=ARG2] ...] + [define KEY1=VALUE [define key2=VALUE2] ...] [QUERY] query2tt2 -help @@ -462,10 +463,10 @@ If neither method is used the script aborts with an error message. =item B<-dbarg=ARG> [ B<-dbarg=ARG> ... ] -The query can have place holders ('?') in it and the corresponding values can -be passed to the script through the B<-dbarg=ARG> option. The option can be -repeated as many times as required and the order of B values is -preserved. +The query can have place holders ('?') in it and the corresponding values for +these placeholders can be passed to the script through the B<-dbarg=ARG> +option. The option can be repeated as many times as required and the order of +B values is preserved. =item B<-template=FILE> @@ -484,8 +485,10 @@ Output from the template is written to STDOUT. The Template Toolkit (TT2) template may receive values from the command line using this option. The argument to the B<-define> option is a B -pair. Keys should be unique otherwise they will overwrite one another. The -keys will become TT2 variables and the values will be assigned to them. +pair. Keys should be unique otherwise they will overwrite one another. They +should also not be 'names' or 'result' because these keys are used internally +(for the data from the database). See below for more details. The keys will +become TT2 variables and the values will be assigned to them. =back @@ -548,12 +551,7 @@ The nominated template file could not be found. An error has occurred while performing a database operation. -=item B - -There is a mismatch between the number of placeholders in the query ('?' -characters) and the number of arguments provided through the B<-dbargs=ARG> -option. The script will attempt to analyse whether there are too many or too -few arguments +=item B There is a mismatch between the number of placeholders in the query ('?' characters) and the number of arguments provided through the B<-dbargs=ARG> diff --git a/InternetArchive/.make_metadata.cfg b/InternetArchive/.make_metadata.cfg index dd4c135..ffacb5f 100644 --- a/InternetArchive/.make_metadata.cfg +++ b/InternetArchive/.make_metadata.cfg @@ -1,16 +1,62 @@ # Version for i7-desktop -# .make_metadata.cfg 2023-07-06 11:54:49 +# .make_metadata.cfg 2024-07-08 13:55:23 +# + +# +# A sanity check value in case an episode number given is too big # max_epno = 9000 + +# +# This is where the script will look for the audio files for upload (if there +# are other "assets" it finds them itself) +# #uploads = "/var/IA/uploads" # on the VPS and marvin uploads = "/home/cendjm/HPR/IA/uploads" + +# +# How a "standard" audio file name is made up +# filetemplate = "hpr%04d.%s" + +# +# How to fill in the "missing bit" in relative URLs +# baseURL = "https://hackerpublicradio.org/" + +# +# *** OBSOLETE *** +# If we need to fetch the MP3 version of the audio, which we do for older +# shows, these are under the 'local' directory. The 'eps' files are actually +# redirections to the IA. This is not normally used for the weekly uploads. +# #URLtemplate = "http://hackerpublicradio.org/eps/%s" #URLtemplate = "https://hackerpublicradio.org/local/%s" + +# +# Printf/sprintf template for building an URL which points back to the current +# show on the HPR site. +# sourceURLtemplate = "https://hackerpublicradio.org/eps/%s/index.html" + +# +# If we are having to collect assets from the HPR server and upload them to +# the IA server we want the final product to be addressable according to the +# following URL template. +# IAURLtemplate = "https://archive.org/download/%s/%s" + +# +# We build a Bash script to perform the upload of files which aren't in the +# CSV generated by make_metadata. We used to use the plain 'ia upload' command +# but now we call a Bash function declared in the script which is slightly +# cleverer. We need to do this to get round the IA code's tendency to "derive" +# all audio, and in doing so strip any audio tags. We perform our own +# equivalent of "derive" *with* the tags and upload them telling the IA *not* +# to re-derive. Mostly it listens. There's also a whole thing about IA keeping +# history of deletions which we want to turn off otherwise our items become +# stuffed with unwanted garbage. +# #iauploadtemplate = "ia upload %s %s --remote-name=%s" iauploadtemplate = "Upload %s %s '%s' '%s'" iauploadoptions = "--retries=5 --no-derive -H x-archive-keep-old-version:0" - diff --git a/InternetArchive/ia.db b/InternetArchive/ia.db index aa1d20e..e50f2e0 100644 Binary files a/InternetArchive/ia.db and b/InternetArchive/ia.db differ diff --git a/InternetArchive/make_metadata b/InternetArchive/make_metadata index 7b4907f..6e97ba8 100755 --- a/InternetArchive/make_metadata +++ b/InternetArchive/make_metadata @@ -19,21 +19,24 @@ # and this version (0.4.12) made into the main line version # because 4.14 was developing in a direction that doesn't fit # with the changes made to the HPR system in June/July 2023. -# Will now move forward with version numbers. +# Will now move forward with version numbers (and will get +# a duplicate). # 2024-01-23: Added the 'open' pragma for UTF-8 +# 2024-07-08: Fixed a bug where the top-level directory was +# being added to assets paths. See the definition of $linkre for +# more detals. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.4.14 # CREATED: 2014-06-13 12:51:04 -# REVISION: 2024-01-23 16:28:59 +# REVISION: 2024-07-08 15:21:02 # #=============================================================================== use 5.010; use strict; use warnings; -use open ':encoding(UTF-8)'; -#use utf8; +use open ':std', ':encoding(UTF-8)'; use Carp; use Getopt::Long; @@ -1527,8 +1530,21 @@ sub find_links_in_notes { # http://www.hackerpublicradio.org/eps/hpr1303/Music_Notes.html # Also things like this (**Why Ken?**) # ../eps/hpr2945/IMG_20191018_122746Z.jpg - # Don't match things like when *not* processing 1986: + # Don't match things like this when *not* processing 1986: # http://hackerpublicradio.org/eps/hpr1986/full_shownotes.html#example-2 + # ---------------------------------------------------------------------- + # NOTE: 2024-07-08 + # + # It used to be that we added a top-level hprXXXX directory to URLs + # because there wasn't one on the HPR server. This was because the + # majority of shows without assets had no files; the notes were taken from + # the database and displayed dynamically. + # + # Now all HPR shows have a top-level directory for holding the index.html + # with the pre-created notes page. So we DO NOT want to create that + # top-level part. The RE below matches but doesn't store it or we'd get + # one too many directory levels. + # ---------------------------------------------------------------------- # $epstr = sprintf( "hpr%04d", $episode ); # my $re @@ -1537,6 +1553,7 @@ sub find_links_in_notes { ^https?:// (?:www.)? (?:hacker|hobby)publicradio.org/eps/ + $epstr/ (.+)$ }x; @@ -1558,7 +1575,7 @@ sub find_links_in_notes { _debug( $DEBUG >= 3, "\$uri = $uri\n" ); _debug( $DEBUG >= 3, "\$uri->fragment = " . $uri->fragment ) if $uri->fragment; - _debug( $DEBUG >= 3, "\$slink = $slink, \n" ); + _debug( $DEBUG >= 3, "\$slink = $slink\n" ); # # Is it an HPR link? @@ -1760,7 +1777,7 @@ sub find_links_in_file { # http://www.hackerpublicradio.org/eps/hpr1303/Music_Notes.html # Also things like this (**Why Ken?**) # ../eps/hpr2945/IMG_20191018_122746Z.jpg - # Don't match things like when *not* processing 1986: + # Don't match things like this when *not* processing 1986: # http://hackerpublicradio.org/eps/hpr1986/full_shownotes.html#example-2 # $epstr = sprintf( "hpr%04d", $episode ); diff --git a/InternetArchive/repair_assets b/InternetArchive/repair_assets new file mode 100755 index 0000000..c00b131 --- /dev/null +++ b/InternetArchive/repair_assets @@ -0,0 +1,627 @@ +#!/bin/bash - +#=============================================================================== +# +# FILE: repair_assets +# +# USAGE: ./repair_assets showid +# +# DESCRIPTION: Given a show where there was a directory of asset files on the +# old HPR server whichj got lost in the migration, rebuild it +# and fill it with assets from the IA. Modify the show notes to +# point to these recovered assets. +# +# OPTIONS: --- +# REQUIREMENTS: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com +# VERSION: 0.0.6 +# CREATED: 2024-05-10 21:26:31 +# REVISION: 2024-07-10 15:12:54 +# +#=============================================================================== + +# set -o nounset # Treat unset variables as an error + +VERSION="0.0.6" + +SCRIPT=${0##*/} +# DIR=${0%/*} + +STDOUT="/dev/fd/2" + +# +# Select the appropriate working directory for the host +# +case $(hostname) in + i7-desktop) + BASEDIR="$HOME/HPR/InternetArchive" + ;; + borg) + BASEDIR="$HOME/IA" + ;; + *) + echo "Wrong host!" + exit 1 + ;; +esac + +cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; } + +# +# Load library functions +# +LIB="$HOME/HPR/function_lib.sh" +[ -e "$LIB" ] || { echo "Unable to source functions"; exit; } +# shellcheck disable=SC1090 +source "$LIB" + +# +# Enable coloured messages +# +define_colours + +# +# Sanity checks +# +IA=$(command -v ia) +[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; } +Q2T=$(command -v query2tt2) +[ -n "$Q2T" ] || { echo "Program 'query2tt2' was not found"; exit 1; } +FIXAL="$BASEDIR/fix_asset_links" +[ -e "$FIXAL" ] || { echo "Program '$FIXAL' was not found"; exit 1; } + +# +# Make temporary files and set traps to delete them +# +TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } +TMP2=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } +trap 'cleanup_temp $TMP1 $TMP2' SIGHUP SIGINT SIGPIPE SIGTERM EXIT + +# {{{ -- Functions -- _verbose, _usage, _log, find_missing, make_dir + +#=== FUNCTION ================================================================ +# NAME: find_missing +# DESCRIPTION: Given two arrays containing IA assets and HPR assets, +# determine which IA assets are missing from the HPR list. +# PARAMETERS: $1 (nameref) IA list +# $2 (nameref) HPR list +# $3 Name of array to receive list of missing assets +# RETURNS: Nothing +#=============================================================================== +find_missing () { + local -n IA="${1}" + local -n HPR="${2}" + local output="${3}" + + local -A hIA hHPR + local i key + + # + # Make a hash keyed by the IA file base names from an indexed array + # + for (( i=0; i<${#IA[@]}; i++ )); do + hIA+=([${IA[$i]##*/}]=${IA[$i]}) + done + + # + # Make a hash keyed by the HPR file base names from an indexed array + # + for (( i=0; i<${#HPR[@]}; i++ )); do + hHPR+=([${HPR[$i]##*/}]=${HPR[$i]}) + done + + # + # Use the basename keys to check what's missing, but return the full path + # names. + # + for key in "${!hIA[@]}"; do + if ! exists_in hHPR "$key"; then + eval "$output+=('${hIA[$key]}')" + fi + done +} + +#=== FUNCTION ================================================================ +# NAME: make_dir +# DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on +# errors. +# PARAMETERS: $1 directory path +# RETURNS: True if success, otherwise exits the caller script +#=============================================================================== +make_dir () { + local dir="${1}" + + if [[ ! -d $dir ]]; then + mkdir -p "$dir" || { + coloured 'red' "Failed to create $dir" + exit 1 + } + fi +} + +#=== FUNCTION ================================================================ +# NAME: _verbose +# DESCRIPTION: Writes a message in verbose mode +# PARAMETERS: * message strings to write +# RETURNS: Nothing +#=============================================================================== +_verbose () { + [ "$VERBOSE" -eq 0 ] && return + for msg; do + printf '%s\n' "$msg" + done +} + +#=== FUNCTION ================================================================ +# NAME: _log +# DESCRIPTION: Appends a record to the file "$LOGFILE" +# PARAMETERS: $1 Message to write +# RETURNS: Nothing +#=============================================================================== +_log () { + local message="${1}" + + echo "$(date +%F\ %T) $message" >> "$LOGFILE" +} + +#=== FUNCTION ================================================================ +# NAME: _usage +# DESCRIPTION: Reports usage; always exits the script after doing so +# PARAMETERS: 1 - the integer to pass to the 'exit' command +# RETURNS: Nothing +#=============================================================================== +_usage () { + local -i result=${1:-0} + + cat >$STDOUT <<-endusage +${SCRIPT} - version: ${VERSION} + +Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] showid + +Attempts to repair an show where the directory of assets was not transferred +from the old HPR server. + +Options: + -h Print this help + -v Run in verbose mode where more information is + reported. Default is off. If -v is repeated it + increases the verbosity level (levels 1 and 2 only). + -d 0|1 Dry run: -d 1 (the default) runs the script in dry-run + mode where nothing is changed but the actions that + will be taken are reported; -d 0 turns off dry-run + mode and the actions will be carried out. + -D Run in debug mode where a lot more information is + reported + +Arguments: + showid The show id in the form 'hpr1234' + +endusage + exit "$result" +} + +# }}} + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#------------------------------------------------------------------------------- +# Directories and files +#------------------------------------------------------------------------------- +LOGS="$BASEDIR/logs" +make_dir "${LOGS}" +LOGFILE="$LOGS/$SCRIPT.log" + +#------------------------------------------------------------------------------- +# Options +#------------------------------------------------------------------------------- +# Default settings +# +VERBOSE=0 + +# +# Process options +# +while getopts :d:Dhv opt +do + case "${opt}" in + D) DEBUG=1;; + d) DRYRUN=$OPTARG;; + h) _usage 0;; + v) ((VERBOSE++));; + *) echo "** Unknown option" + _usage 1;; + esac +done +shift $((OPTIND - 1)) + +# +# Set option defaults and check their values +# +DRYRUN=${DRYRUN:-1} +if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then + coloured 'red' "** Use '-d 0' or '-d 1'" + _usage 1 +fi +[[ $VERBOSE -gt 0 && $DRYRUN -eq 1 ]] && echo "Dry run mode" + +DEBUG=${DEBUG:-0} +[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode" + +#------------------------------------------------------------------------------- +# Argument check +#------------------------------------------------------------------------------- +# Should have one argument +# +if [[ $# != 1 ]]; then + coloured 'red' "Missing argument" + _usage 1 +fi +show="${1,,}" + +# +# Ensure show id is correctly formatted. We want it to be 'hpr1234' +# +if [[ $show =~ (hpr)?([0-9]+) ]]; then + printf -v show 'hpr%04d' "${BASH_REMATCH[2]}" +else + coloured 'red' "Incorrect show specification: $show" + coloured 'yellow' "Use 'hpr9999' or '9999' format" + exit 1 +fi +_DEBUG "Parsed item: $show" +echo "Processing show $show" +_log "Processing show $show; dry-run: $([ "$DRYRUN" -eq 1 ] && echo "on" || echo "off")" + +#------------------------------------------------------------------------------- +# Declarations and constants +#------------------------------------------------------------------------------- +declare -a iacache + +# +# SHOWURL is where the show will be on the webserver +# +printf -v SHOWURL 'https://hackerpublicradio.org/eps/%s/index.html' "$show" + +# +# CACHEDIR is where we store asset details and files +# +CACHEDIR="$BASEDIR/assets" +[ ! -d "$CACHEDIR" ] && { + coloured 'red' "Creating cache directory" + make_dir "$CACHEDIR" +} + +# +# Pointers into the cache: +# LOCAL_ASSETDIR - where the cache for this show lives +# LOCAL_FILEDIR - where the IA files have been placed +# LOCAL_PARENTDIR - the equivalent directory to the top show dir +# +LOCAL_ASSETDIR="$CACHEDIR/${show}" +LOCAL_FILEDIR="$LOCAL_ASSETDIR/files" +LOCAL_PARENTDIR="$LOCAL_FILEDIR/${show}" + +# +# Pointers to the HPR server directories: +# REMOTE_ASSETDIR - where the assets are to go +# REMOTE_PARENTDIR - the remote parent directory +# +REMOTE_ASSETDIR="public_html/eps/${show}/${show}" +REMOTE_PARENTDIR="public_html/eps/${show}" + +CMDTPL='ssh hpr@hackerpublicradio.org %s' + +MANIFEST="$CACHEDIR/$show/manifest" +DBNOTES="$CACHEDIR/$show/notes.html" + +#------------------------------------------------------------------------------- +# Check the show exists in the database (or is visible on the website). +#------------------------------------------------------------------------------- +_verbose "Checking the show exists on the HPR server" +result=$(curl --head --silent --write-out "%{http_code}" --output /dev/null "$SHOWURL") +if [[ $result -eq 404 ]]; then + coloured 'red' "Could not detect show '$show' on the HPR server" + _log "Show '$show' not on the HPR server" + exit 1 +fi + +#------------------------------------------------------------------------------- +# Check the show exists on the IA +#------------------------------------------------------------------------------- +_verbose "Checking the show exists on the IA server" +if ! ia metadata "$show" --exists > /dev/null 2>&1; then + coloured 'red' "Could not detect show '$show' on the IA server" + coloured 'yellow' "Check that archive.org is available" + coloured 'yellow' "Try https://downfor.io/internet-archive" + _log "Show '$show' not on the IA server" + exit 1 +fi + +#------------------------------------------------------------------------------- +# Check IA, collect contents, classify them +#------------------------------------------------------------------------------- +# Interrogate the IA for the required item contents. If it returns True we can +# collect its contents, otherwise we can't proceed. The file 'TMP1' contains +# just a simple list of the files on the IA relating to this item. +# +_verbose "Collecting filenames from the IA server" +if ia list "$show" > "$TMP1"; then + while read -r iafile; do + iacache+=("$iafile") + done < "$TMP1" +else + coloured 'red' "Item $show can't be found on the IA" + coloured 'red' "Can't continue" + _log "Files for show '$show' not on the IA server" + exit 1 +fi + +_DEBUG "$(printf '%s\n' "${iacache[@]}")" + +# +# Determine which files are assets +# +_verbose "Categorising files held on the IA" + +declare -a audio ia_transcript ia_asset + +audio_re="^${show}\.(flac|mp3|ogg|opus|spx|wav)\$" +# transcript_re="^${show}/${show}/${show}\.(json|srt|tsv|txt|vtt)\$" +transcript_re="^${show}/${show}\.(json|srt|tsv|txt|vtt)\$" +asset_re="^${show}/(${show}/)?.*\$" +metadata_re="^(__ia_thumb.jpg|${show}[^/]+\.(afpk|torrent|gz|xml|sqlite|png))\$" + +for file in "${iacache[@]}"; do + if [[ $file =~ $audio_re ]]; then + audio+=("$file") + elif [[ $file =~ $metadata_re ]]; then + _verbose "Skipping $file" + continue + elif [[ $file =~ $transcript_re ]]; then + ia_transcript+=("$file") + elif [[ $file =~ $asset_re ]]; then + ia_asset+=("$file") + fi +done + +# +# Report what was collected at verbosity level 2 +# +if [[ $VERBOSE -gt 1 ]]; then + coloured 'cyan' "** audio (${#audio[@]}):" + printf '%s\n' "${audio[@]}" + + coloured 'cyan' "** transcript (${#ia_transcript[@]}):" + printf '%s\n' "${ia_transcript[@]}" + + coloured 'cyan' "** asset (${#ia_asset[@]}):" + printf '%s\n' "${ia_asset[@]}" + + _log "IA asset count for show '$show' = ${#ia_asset[@]}" +fi + +# +# No assets, no need to proceed! +# +if [[ ${#ia_asset[@]} -eq 0 ]]; then + coloured 'green' "No IA assets found for show $show; nothing to do" + _log "Nothing to do for show $show" + exit +fi + +#------------------------------------------------------------------------------- +# Check what's on the HPR server +#------------------------------------------------------------------------------- +# +# 'rc' is the remote command template +# +printf -v rc 'find public_html/eps/%s -type f -printf "%s/%%P\\n"' "$show" "$show" + +# +# 'command' is the local command we'll run to run a remote command on the HPR +# server +# +# shellcheck disable=SC2059 disable=SC2089 +printf -v command "$CMDTPL" "'$rc'" + +if [[ $VERBOSE -gt 1 ]]; then + echo "Command: $command" +fi + +declare -a hpr_asset +ignore_re="index.html$" + +# +# Run the command and save the output. Save the asset names returned in an +# array. TODO: Handle errors from the command +# +if [[ $DRYRUN -eq 0 ]]; then + eval "$command" > "$TMP2" + RES=$? + if [[ $RES -eq 0 ]]; then + _verbose "$(coloured 'green' "Remote command successful")" + while read -r hprfile; do + if [[ ! $hprfile =~ $ignore_re ]]; then + hpr_asset+=("${hprfile}") + fi + done < "$TMP2" + _verbose "$(coloured 'green' "Assets found on HPR server = ${#hpr_asset[@]}")" + _verbose "$(printf '%s\n' "${hpr_asset[@]}")" + _log "Assets found on HPR server = ${#hpr_asset[@]}" + else + coloured 'red' "Remote command failed" + _log "Failed while searching for HPR assets" + exit 1 + fi +else + coloured 'yellow' "Would have searched for assets on the HPR server" +fi + +#------------------------------------------------------------------------------- +# Compare the two asset lists and return what's missing on the HPR server +#------------------------------------------------------------------------------- +declare -a missing +find_missing ia_asset hpr_asset missing +_verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")" +_verbose "$(printf '%s\n' "${missing[@]}")" + +if [[ ${#missing[@]} -eq 0 ]]; then + coloured 'green' "No missing assets detected; nothing to do" + _log "No missing assets detected; nothing to do" + exit +else + coloured 'yellow' "Found ${#missing[@]} files missing on the HPR server" +fi + +#------------------------------------------------------------------------------- +# Prepare to copy the missing files +#------------------------------------------------------------------------------- +make_dir "$LOCAL_FILEDIR" + +declare -a downloads + +# +# Check whether files are already downloaded +# +for file in "${missing[@]}"; do + if [[ ! -e "$LOCAL_FILEDIR/$show/$file" ]]; then + downloads+=("$file") + fi +done + +_verbose "$(coloured 'cyan' "** downloads (${#downloads[@]}):")" +_verbose "$(printf '%s\n' "${downloads[@]}")" + +# +# If we have files to download get them now +# +if [[ ${#downloads[@]} -gt 0 ]]; then + if [[ $DRYRUN -eq 1 ]]; then + coloured 'yellow' "Would have downloaded missing files from the IA" + else + ia download "$show" --destdir="$LOCAL_FILEDIR" "${downloads[@]}" + RES=$? + if [[ $RES -eq 0 ]]; then + coloured 'green' "Downloads complete" + _log "Downloaded IA assets for show $show" + fi + fi +else + coloured 'yellow' "IA files are already downloaded" +fi + +# shellcheck disable=SC2089 +RSYNCTPL="rsync -a -e 'ssh' %s hpr@hpr:%s" + +#------------------------------------------------------------------------------- +# Build the 'ssh' command to make a directory +#------------------------------------------------------------------------------- +# +# Prepare to make the remote directory if necessary. +# +# - $rc is the remote command we'll run on the server +# - $command is the full 'ssh' command including $rc +# +printf -v rc 'if [ ! -e "%s" ]; then mkdir -p "%s"; fi' \ + "$REMOTE_ASSETDIR" "$REMOTE_ASSETDIR" + +# shellcheck disable=SC2059 disable=SC2089 +printf -v command "$CMDTPL" "'$rc'" + +#------------------------------------------------------------------------------- +# Run or report the command that would be run +#------------------------------------------------------------------------------- +if [[ $DRYRUN -eq 0 ]]; then + eval "$command" + RES=$? + if [[ $RES -eq 0 ]]; then + coloured 'green' "Remote directory creation successful" + else + coloured 'red' "Remote directory creation failed" + fi +else + coloured 'yellow' "Would have created the remote directory" + echo "$command" +fi + +#------------------------------------------------------------------------------- +# Synchronise assets to the directory +#------------------------------------------------------------------------------- +# shellcheck disable=SC2059 disable=SC2089 +printf -v command "$RSYNCTPL" "$LOCAL_PARENTDIR/" "$REMOTE_PARENTDIR/" + +if [[ $DRYRUN -eq 0 ]]; then + eval "$command" + RES=$? + if [[ $RES -eq 0 ]]; then + coloured 'green' "Remote upload successful" + _log "Uploaded assets for show $show" + else + coloured 'red' "Remote upload failed" + exit 1 + fi +else + coloured 'yellow' "Would have synchronised local assets with the remote directory" + echo "$command" +fi + +#------------------------------------------------------------------------------- +# Make a 'manifest' file if necessary +#------------------------------------------------------------------------------- +if [[ $DRYRUN -eq 0 ]]; then + if [[ ! -e $MANIFEST ]]; then + find "$LOCAL_PARENTDIR" -type f -printf '%P\n' > "$MANIFEST" + _verbose "$(coloured 'green' "Created manifest file")" + _log "Created manifest file $MANIFEST" + fi +fi + +#------------------------------------------------------------------------------- +# Save the notes from the database if necessary +#------------------------------------------------------------------------------- +if [[ $DRYRUN -eq 0 ]]; then + if [[ ! -e $DBNOTES ]]; then + if ! tunnel_is_open; then + open_tunnel + fi + if query2tt2 -config="$BASEDIR/.hpr_livedb.cfg" \ + -temp="$BASEDIR/query2tt2_nokey.tpl" \ + -out="$DBNOTES" \ + -dbarg="${show:3}" \ + 'select notes from eps where id = ?' + then + _verbose "$(coloured 'green' "Created notes file")" + _log "Created notes file $DBNOTES" + else + _verbose "$(coloured 'red' "Creation of notes file failed")" + _log "Creation of notes file $DBNOTES failed" + fi + fi +fi + +#------------------------------------------------------------------------------- +# Adjust the notes with 'fix_asset_links' +#------------------------------------------------------------------------------- +if [[ $DRYRUN -eq 0 ]]; then + echo "$FIXAL" + # $FIXAL +fi + + +# +# All done +# +if [[ $DRYRUN -eq 0 ]]; then + _log "Repaired show $show" +fi + +#------------------------------------------------------------------------------- +# √ Make a place to hold the files on this machine +# √ Download them from the IA +# √ Make a directory on the HPR server +# √ Copy the assets to the HPR server +# Modify the notes to point to the assets on the server +#------------------------------------------------------------------------------- + +# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker diff --git a/InternetArchive/repair_item b/InternetArchive/repair_item index 7a6831c..7700330 100755 --- a/InternetArchive/repair_item +++ b/InternetArchive/repair_item @@ -3,7 +3,7 @@ # # FILE: repair_item # -# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] itemname +# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] itemname # # DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during # the upload. @@ -18,20 +18,32 @@ # temporarily on 'borg') and determines which have not been # uploaded, then takes steps to perform the uploads. # +# Version 0.0.10 onwards has the capability to repair an IA item +# from the HPR backup disk. This seems to be necessary because +# the transcripts were not carried over (although we are +# adding them to the IA for new shows now, older ones were never +# copied), and there has been a case where none of the assets +# were on the IA. The method used it to place the backup files +# in the directory 'repairs' under the local IA or +# InternetArchive directory. The files are held in the hierarchy +# '$item/$item/'. The assets are in the lower directory and the +# source file is in the upper one. This emulates the placement +# on the IA itself. +# # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com -# VERSION: 0.0.9 +# VERSION: 0.0.10 # CREATED: 2020-01-05 22:42:46 -# REVISION: 2024-06-14 18:03:58 +# REVISION: 2024-07-12 14:39:38 # #=============================================================================== #set -o nounset # Treat unset variables as an error -VERSION="0.0.9" +VERSION="0.0.10" SCRIPT=${0##*/} # DIR=${0%/*} @@ -45,10 +57,12 @@ case $(hostname) in i7-desktop) BASEDIR="$HOME/HPR/InternetArchive" UPLOADS="$HOME/HPR/IA/uploads" + REPAIRS="$BASEDIR/repairs" ;; borg) BASEDIR="$HOME/IA" UPLOADS="/data/IA/uploads" + REPAIRS="$BASEDIR/repairs" ;; *) echo "Wrong host!" @@ -185,7 +199,7 @@ _usage () { cat >$STDOUT <<-endusage ${SCRIPT} - version: ${VERSION} -Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] item +Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] item Attempts to repair an IA item where the upload has failed for some reason. @@ -203,6 +217,12 @@ Options: during one run of the script. The range is 1 to $DEFLIMIT. This can be helpful when there are upload problems. + -X Run in "extended" mode. In this mode the directory + holding files to be added to the IA is '~/IA/repairs' + and the files have most likely come from the HPR + backup disk and aren't on the IA due some error. We + want to use the capabilities of ${SCRIPT} to repair + things and deal with the IA upload problems. Arguments: item The item in the form 'hpr1234' @@ -229,7 +249,7 @@ DEFLIMIT=20 # # Process options # -while getopts :d:Dhl:v opt +while getopts :d:Dhl:vX opt do case "${opt}" in D) DEBUG=1;; @@ -237,6 +257,7 @@ do h) _usage 0;; l) LIMIT=$OPTARG;; v) VERBOSE=1;; + X) EXTENDED=1;; *) echo "** Unknown option" _usage 1;; esac @@ -264,6 +285,8 @@ if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then _usage 1 fi +EXTENDED=${EXTENDED:-0} + # # Should have one argument # @@ -295,6 +318,22 @@ if ! ia metadata "$item" --exists > /dev/null 2>&1; then exit 1 fi +# +# The -X (EXTENDED) mode is for when we have to upload files that have +# mysteriously vanished from the IA. The directories here are equivalent to +# those used by 'repair_assets'. There is a top-level directory the represents +# the IA item, and below that a hierarchy defining placement under the item. +# There is a 'repairs' directory per host in case we need to preair IA stuff +# from elsewhere. +# +if [[ $EXTENDED -eq 1 ]]; then + coloured 'cyan' "Using 'Extended' mode" + if [[ ! -e $REPAIRS ]]; then + mkdir -p "$REPAIRS" + fi + UPLOADS="$REPAIRS/$item" +fi + # # Declarations #