| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #!/bin/bash - | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | #         FILE: repair_item | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | #        USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] itemname | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | #  DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | #               the upload (and when recovering deleted files from the | 
					
						
							|  |  |  | #               changeover to the HPR static site). | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | #               The most common failures are caused by the file upload | 
					
						
							|  |  |  | #               processes timing out and being aborted (by the 'ia' tool which | 
					
						
							|  |  |  | #               performs the item creation and the uploads). This failure | 
					
						
							|  |  |  | #               means that a show being processed on 'borg' does not get all | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | #               of the components loaded to the IA. This happens during the | 
					
						
							|  |  |  | #               sequence of running the 'make_metadata' Perl script which | 
					
						
							|  |  |  | #               generates a CSV file of show data, followed by 'ia metadata | 
					
						
							|  |  |  | #               --spreadsheet=<CSV file>'. Failures in the second part cause | 
					
						
							|  |  |  | #               it to be aborted | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | #               This script looks at the files belonging to the show (stored | 
					
						
							|  |  |  | #               temporarily on 'borg') and determines which have not been | 
					
						
							|  |  |  | #               uploaded, then takes steps to perform the uploads. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-11-23 22:28:52 +00:00
										 |  |  | #               Version 0.0.12 onwards has the capability to repair an IA item | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | #               from the HPR backup disk. This seems to be necessary because | 
					
						
							|  |  |  | #               the transcripts were not carried over (although we are | 
					
						
							|  |  |  | #               adding them to the IA for new shows now, older ones were never | 
					
						
							|  |  |  | #               copied), and there has been a case where none of the assets | 
					
						
							|  |  |  | #               were on the IA. The method used it to place the backup files | 
					
						
							|  |  |  | #               in the directory 'repairs' under the local IA or | 
					
						
							|  |  |  | #               InternetArchive directory. The files are held in the hierarchy | 
					
						
							|  |  |  | #               '$item/$item/'. The assets are in the lower directory and the | 
					
						
							|  |  |  | #               source file is in the upper one. This emulates the placement | 
					
						
							|  |  |  | #               on the IA itself. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | #               This script can be called directly to recover a new show which | 
					
						
							|  |  |  | #               failed during creation/upload, or by 'recover_transcripts' | 
					
						
							|  |  |  | #               which is repairing shows with missing assets. | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #      OPTIONS: --- | 
					
						
							|  |  |  | # REQUIREMENTS: --- | 
					
						
							|  |  |  | #         BUGS: --- | 
					
						
							|  |  |  | #        NOTES: --- | 
					
						
							|  |  |  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | 
					
						
							| 
									
										
										
										
											2024-11-23 22:28:52 +00:00
										 |  |  | #      VERSION: 0.0.12 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #      CREATED: 2020-01-05 22:42:46 | 
					
						
							| 
									
										
										
										
											2024-11-23 22:28:52 +00:00
										 |  |  | #     REVISION: 2024-09-13 18:19:59 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #set -o nounset                              # Treat unset variables as an error | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-23 22:28:52 +00:00
										 |  |  | VERSION="0.0.12" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | SCRIPT=${0##*/} | 
					
						
							|  |  |  | # DIR=${0%/*} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | STDOUT="/dev/fd/2" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Select the appropriate working directory for the host | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | case $(hostname) in | 
					
						
							|  |  |  |     i7-desktop) | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  |         # TODO: consider not allowing this to be run anywhere but on 'borg' | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         BASEDIR="$HOME/HPR/InternetArchive" | 
					
						
							|  |  |  |         UPLOADS="$HOME/HPR/IA/uploads" | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  |         REPAIRS="$BASEDIR/repairs" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         ;; | 
					
						
							|  |  |  |     borg) | 
					
						
							|  |  |  |         BASEDIR="$HOME/IA" | 
					
						
							|  |  |  |         UPLOADS="/data/IA/uploads" | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  |         REPAIRS="$BASEDIR/repairs" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         ;; | 
					
						
							|  |  |  |     *) | 
					
						
							|  |  |  |         echo "Wrong host!" | 
					
						
							|  |  |  |         exit 1 | 
					
						
							|  |  |  |         ;; | 
					
						
							|  |  |  | esac | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Load library functions | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | LIB="$HOME/bin/function_lib.sh" | 
					
						
							|  |  |  | [ -e "$LIB" ] || { echo "Unable to source functions"; exit; } | 
					
						
							|  |  |  | # shellcheck disable=SC1090 | 
					
						
							|  |  |  | source "$LIB" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Enable coloured messages | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | define_colours | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Sanity checks | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | JQ=$(command -v jq) | 
					
						
							|  |  |  | [ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; } | 
					
						
							|  |  |  | IA=$(command -v ia) | 
					
						
							|  |  |  | [ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Make temporary files and set traps to delete them | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } | 
					
						
							|  |  |  | trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | # {{{ -- Functions -- Upload, exists_in, queued_tasks, _usage | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: Upload | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  | #  DESCRIPTION: Uploads a file to the Internet Archive with various options. | 
					
						
							|  |  |  | #               Any output from the 'ia' command invocation is saved in | 
					
						
							|  |  |  | #               a temporary file and the name reported to the caller | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #   PARAMETERS: 1 - the item id (e.g. 'hpr1234' | 
					
						
							|  |  |  | #               2 - the path to the file for upload | 
					
						
							|  |  |  | #               3 - (optional) the path to the file on the IA | 
					
						
							|  |  |  | #               4 - (optional) list of options for 'ia upload' enclosed as | 
					
						
							|  |  |  | #                   a string | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  | #      RETURNS: Exit code of last command | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | #=============================================================================== | 
					
						
							|  |  |  | Upload () { | 
					
						
							|  |  |  |     local id=${1} | 
					
						
							|  |  |  |     local file=${2} | 
					
						
							|  |  |  |     local remote=${3:-} | 
					
						
							|  |  |  |     local options=${4:-} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |     local RES | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |     if [[ -e $file ]]; then | 
					
						
							|  |  |  |         if [[ -z $remote ]]; then | 
					
						
							|  |  |  |             # shellcheck disable=SC2086 | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |             ia upload ${id} ${file} ${options} > /dev/null 2>&1 | 
					
						
							|  |  |  |             RES=$? | 
					
						
							|  |  |  |             return $RES | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         else | 
					
						
							|  |  |  |             # shellcheck disable=SC2086 | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |             ia upload ${id} ${file} --remote-name=${remote} ${options} > /dev/null 2>&1 | 
					
						
							|  |  |  |             RES=$? | 
					
						
							|  |  |  |             return $RES | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         fi | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |         echo "File missing: $file" | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |         return 1 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |     fi | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: exists_in | 
					
						
							|  |  |  | #  DESCRIPTION: Checks the existence of a key in an associative array | 
					
						
							|  |  |  | #   PARAMETERS: $1      array name | 
					
						
							|  |  |  | #               $2      key value | 
					
						
							|  |  |  | #      RETURNS: True if the key exists, False otherwise | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Modified from | 
					
						
							|  |  |  | # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | exists_in () { | 
					
						
							|  |  |  |     # shellcheck disable=SC2086 | 
					
						
							|  |  |  |     eval '[ ${'$1'[$2]+muahaha} ]' | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: queued_tasks | 
					
						
							|  |  |  | #  DESCRIPTION: Queries the IA for any queued or running tasks for an item. | 
					
						
							|  |  |  | #               Writes the number to STDOUT so it can be captured. | 
					
						
							|  |  |  | #   PARAMETERS: $1      IA item (like hpr1192) | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | queued_tasks () { | 
					
						
							|  |  |  |     local item="${1:?Usage: queued_tasks item}" | 
					
						
							|  |  |  |     local -i count=0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     count="$(ia tasks "$item" |\ | 
					
						
							|  |  |  |         jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     echo "$count" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #===  FUNCTION  ================================================================ | 
					
						
							|  |  |  | #         NAME: _usage | 
					
						
							|  |  |  | #  DESCRIPTION: Reports usage; always exits the script after doing so | 
					
						
							|  |  |  | #   PARAMETERS: 1 - the integer to pass to the 'exit' command | 
					
						
							|  |  |  | #      RETURNS: Nothing | 
					
						
							|  |  |  | #=============================================================================== | 
					
						
							|  |  |  | _usage () { | 
					
						
							|  |  |  |     local -i result=${1:-0} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cat >$STDOUT <<-endusage | 
					
						
							|  |  |  | ${SCRIPT} - version: ${VERSION} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] item | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | Attempts to repair an IA item where the upload has failed for some reason. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Options: | 
					
						
							|  |  |  |   -h                    Print this help | 
					
						
							|  |  |  |   -v                    Run in verbose mode where more information is | 
					
						
							|  |  |  |                         reported. Default is off. | 
					
						
							|  |  |  |   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run | 
					
						
							|  |  |  |                         mode where nothing is changed but the actions that | 
					
						
							|  |  |  |                         will be taken are reported; -d 0 turns off dry-run | 
					
						
							|  |  |  |                         mode and the actions will be carried out. | 
					
						
							|  |  |  |   -D                    Run in debug mode where a lot more information is | 
					
						
							|  |  |  |                         reported | 
					
						
							| 
									
										
										
										
											2024-06-14 16:00:04 +01:00
										 |  |  |   -l N                  Control the number of files that can be uploaded | 
					
						
							|  |  |  |                         during one run of the script. The range is 1 to | 
					
						
							|  |  |  |                         $DEFLIMIT. This can be helpful when there are upload | 
					
						
							|  |  |  |                         problems. | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  |   -X                    Run in "extended" mode. In this mode the directory | 
					
						
							|  |  |  |                         holding files to be added to the IA is '~/IA/repairs' | 
					
						
							|  |  |  |                         and the files have most likely come from the HPR | 
					
						
							|  |  |  |                         backup disk and aren't on the IA due some error. We | 
					
						
							|  |  |  |                         want to use the capabilities of ${SCRIPT} to repair | 
					
						
							|  |  |  |                         things and deal with the IA upload problems. | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | Arguments: | 
					
						
							|  |  |  |     item                The item in the form 'hpr1234' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | endusage | 
					
						
							|  |  |  |     exit "$result" | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # }}} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Directories and files | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | LOGS="$BASEDIR/logs" | 
					
						
							|  |  |  | LOGFILE="$LOGS/$SCRIPT.log" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Constants | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | DEFLIMIT=20 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Process options | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | while getopts :d:Dhl:vX opt | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | do | 
					
						
							|  |  |  |     case "${opt}" in | 
					
						
							|  |  |  |         D) DEBUG=1;; | 
					
						
							|  |  |  |         d) DRYRUN=$OPTARG;; | 
					
						
							|  |  |  |         h) _usage 0;; | 
					
						
							|  |  |  |         l) LIMIT=$OPTARG;; | 
					
						
							|  |  |  |         v) VERBOSE=1;; | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  |         X) EXTENDED=1;; | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         *) echo "** Unknown option" | 
					
						
							|  |  |  |            _usage 1;; | 
					
						
							|  |  |  |     esac | 
					
						
							|  |  |  | done | 
					
						
							|  |  |  | shift $((OPTIND - 1)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Set option defaults and check their values | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | VERBOSE=${VERBOSE:-0} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DRYRUN=${DRYRUN:-1} | 
					
						
							|  |  |  | if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then | 
					
						
							|  |  |  |     echo "** Use '-d 0' or '-d 1'" | 
					
						
							|  |  |  |     _usage 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | [[ $VERBOSE -eq 1 && $DRYRUN -eq 1 ]] && echo "Dry run mode" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DEBUG=${DEBUG:-0} | 
					
						
							|  |  |  | [[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | LIMIT=${LIMIT:-$DEFLIMIT} | 
					
						
							|  |  |  | if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then | 
					
						
							|  |  |  |     echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option" | 
					
						
							|  |  |  |     _usage 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | EXTENDED=${EXTENDED:-0} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | # Should have one argument | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if [[ $# != 1 ]]; then | 
					
						
							|  |  |  |     coloured 'red' "Missing argument" | 
					
						
							|  |  |  |     _usage 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | item="${1}" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | # Ensure item spec is correctly formatted. Have to cater for leading zeroes | 
					
						
							|  |  |  | # being interpreted as octal. | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | if [[ $item =~ hpr([0-9]+) ]]; then | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  |     printf -v item 'hpr%04d' "$((10#${BASH_REMATCH[1]}))" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | else | 
					
						
							|  |  |  |     coloured 'red' "Incorrect show specification: $item" | 
					
						
							|  |  |  |     coloured 'yellow' "Use 'hpr9999' format" | 
					
						
							|  |  |  |     exit 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | _DEBUG "Parsed item: $item" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 16:00:04 +01:00
										 |  |  | # | 
					
						
							|  |  |  | # It's possible that the show upload failed before anything was uploaded, even | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | # the metadata. It's rarely seen, but it seems wise to cater for it. | 
					
						
							| 
									
										
										
										
											2024-06-14 16:00:04 +01:00
										 |  |  | # | 
					
						
							|  |  |  | if ! ia metadata "$item" --exists > /dev/null 2>&1; then | 
					
						
							|  |  |  |     coloured 'red' "This item is not apparently on the IA; can't continue" | 
					
						
							|  |  |  |     coloured 'yellow' "Try running the entire upload again from the start" | 
					
						
							|  |  |  |     exit 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | # | 
					
						
							|  |  |  | # The -X (EXTENDED) mode is for when we have to upload files that have | 
					
						
							|  |  |  | # mysteriously vanished from the IA. The directories here are equivalent to | 
					
						
							|  |  |  | # those used by 'repair_assets'. There is a top-level directory the represents | 
					
						
							|  |  |  | # the IA item, and below that a hierarchy defining placement under the item. | 
					
						
							| 
									
										
										
										
											2024-08-22 13:13:38 +01:00
										 |  |  | # There is a 'repairs' directory per host in case we need to repair IA stuff | 
					
						
							| 
									
										
										
										
											2024-07-16 21:39:28 +01:00
										 |  |  | # from elsewhere. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if [[ $EXTENDED -eq 1 ]]; then | 
					
						
							|  |  |  |     coloured 'cyan' "Using 'Extended' mode" | 
					
						
							|  |  |  |     if [[ ! -e $REPAIRS ]]; then | 
					
						
							|  |  |  |         mkdir -p "$REPAIRS" | 
					
						
							|  |  |  |     fi | 
					
						
							|  |  |  |     UPLOADS="$REPAIRS/$item" | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | # | 
					
						
							|  |  |  | # Declarations | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | declare -A fcache | 
					
						
							|  |  |  | declare -A iacache | 
					
						
							|  |  |  | declare -a missed | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Scan the directory 'UPLOADS' where files for upload to the IA are stored and | 
					
						
							|  |  |  | # collect everything for this item (show). | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # See the `find' pipeline at the end of the loop which selects only files, not | 
					
						
							|  |  |  | # directories. It outputs the last change time and the full file path, sorts | 
					
						
							|  |  |  | # on the time, then removes it. This ensures we process the files in time | 
					
						
							|  |  |  | # order rather than alphabetic order of their names. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # TODO: This algorithm is from another script and is not needed here. The | 
					
						
							|  |  |  | # order of processing is irrelevant here so simplify the 'find' and the loop. | 
					
						
							|  |  |  | # We are only looking for the 'item' specified by the argument, not other | 
					
						
							|  |  |  | # ones. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | while read -r path; do | 
					
						
							|  |  |  |     relpath="${path#"$UPLOADS"/}" | 
					
						
							|  |  |  |     item="${relpath:0:7}" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     [[ $VERBOSE -eq 1 ]] && echo "Found $path" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _DEBUG "Path:          $path" | 
					
						
							|  |  |  |     _DEBUG "Relative path: $relpath" | 
					
						
							|  |  |  |     _DEBUG "IA item:       $item" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ! exists_in fcache "$relpath"; then | 
					
						
							|  |  |  |         # shellcheck disable=SC2034 | 
					
						
							|  |  |  |         fcache[$relpath]=1 | 
					
						
							|  |  |  |     fi | 
					
						
							|  |  |  | done < <(find "$UPLOADS" -type f -regextype posix-extended \ | 
					
						
							|  |  |  |     -regex ".*$item.*" -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort  | cut -f2 -d' ') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Did we find anything? | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if [[ ${#fcache[@]} -eq 0 ]]; then | 
					
						
							|  |  |  |     coloured 'red' "No files found for item $item in $UPLOADS" | 
					
						
							|  |  |  |     coloured 'red' "Can't continue" | 
					
						
							|  |  |  |     exit 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Look to see if there are any tasks queued for this show on the IA servers. | 
					
						
							|  |  |  | # If there are we can't continue. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # TODO: This could be a loop waiting for tasks to complete rather than | 
					
						
							|  |  |  | # aborting and asking to be rerun. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | tasks=$(queued_tasks "$item") | 
					
						
							|  |  |  | if [[ $tasks -gt 0 ]]; then | 
					
						
							|  |  |  |     coloured 'red' \ | 
					
						
							|  |  |  |         "Item $item still has $tasks unfinished $(ngettext task tasks "$tasks")" | 
					
						
							|  |  |  |     coloured 'red' "Allow time for task(s) to finish and try again later" | 
					
						
							|  |  |  |     exit 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Interrogate the IA for the required item contents. If it returns True we can | 
					
						
							|  |  |  | # collect its contents, otherwise we can't proceed. The file 'TMP1' contains | 
					
						
							|  |  |  | # just a simple list of the files on the IA relating to this item. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if ia list "$item" > "$TMP1"; then | 
					
						
							|  |  |  |     while read -r iafile; do | 
					
						
							|  |  |  |         # shellcheck disable=SC2034 | 
					
						
							|  |  |  |         iacache[$iafile]=1 | 
					
						
							|  |  |  |     done < "$TMP1" | 
					
						
							|  |  |  | else | 
					
						
							|  |  |  |     coloured 'red' "Item $item is not in the IA" | 
					
						
							|  |  |  |     coloured 'red' "Can't continue" | 
					
						
							|  |  |  |     exit 1 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Look through the list of files we found and detect any not on the IA | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | for path in "${!fcache[@]}"; do | 
					
						
							|  |  |  |     if ! exists_in iacache "$path"; then | 
					
						
							|  |  |  |         missed+=("$path") | 
					
						
							|  |  |  |     fi | 
					
						
							|  |  |  | done | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Counters and defaults for the loop | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | retry_threshold=5 | 
					
						
							|  |  |  | sleeptime=20 | 
					
						
							|  |  |  | failures=0 | 
					
						
							|  |  |  | upload_count=0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # If there are missed files we can report what we'd be doing or do it, | 
					
						
							|  |  |  | # otherwise we have nothing to do. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if [[ ${#missed[@]} -eq 0 ]]; then | 
					
						
							|  |  |  |     coloured 'green' "All expected files for item $item are on the IA" | 
					
						
							|  |  |  | else | 
					
						
							|  |  |  |     mcount="${#missed[@]}" | 
					
						
							|  |  |  |     coloured 'red' "There $(ngettext "is 1 missing file" "are $mcount missing files" "$mcount"):" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     [[ $DRYRUN -eq 1 ]] && { | 
					
						
							|  |  |  |         coloured 'blue' "Dry run: Would have run the following command(s):" | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for file in "${missed[@]}"; do | 
					
						
							|  |  |  |         cmd="Upload $item $UPLOADS/$file " | 
					
						
							|  |  |  |         cmd+="'$file' '--retries=5 --no-derive -H x-archive-keep-old-version:0'" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if [[ $DRYRUN -eq 1 ]]; then | 
					
						
							|  |  |  |             coloured 'yellow' "$cmd" | 
					
						
							|  |  |  |         else | 
					
						
							|  |  |  |             retries=0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 16:00:04 +01:00
										 |  |  |             coloured 'blue' "Uploading $file" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             # Run 'cmd'. If it succeeds then write to the log and loop for the | 
					
						
							|  |  |  |             # next missing file. If it fails enter the 'until' loop and report | 
					
						
							|  |  |  |             # and the problem. Count the number of times this is done, so it | 
					
						
							|  |  |  |             # doesn't loop forever. If we have reached the limit count this as | 
					
						
							|  |  |  |             # a failure and continue the parent loop (with the next missing | 
					
						
							|  |  |  |             # file). If we haven't retried enough yet, sleep for a while and | 
					
						
							|  |  |  |             # try again. The intention is to catch the case when an upload | 
					
						
							|  |  |  |             # times out. The 'ia' command is performing its own retries per | 
					
						
							|  |  |  |             # upload when the system is overloaded, but these are non-fatal. | 
					
						
							|  |  |  |             # | 
					
						
							|  |  |  |             until eval "$cmd"; do | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |                 coloured 'red' "Failure when uploading $file" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |                 ((retries++)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 printf '%s Failed to upload %s to the IA [%d]\n' \ | 
					
						
							|  |  |  |                     "$(date +%Y%m%d%H%M%S)" "$file" "$retries" >> "$LOGFILE" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 [ "$retries" -eq "$retry_threshold" ] && { | 
					
						
							|  |  |  |                     ((failures++)) | 
					
						
							| 
									
										
										
										
											2024-06-14 16:00:04 +01:00
										 |  |  |                     [[ $VERBOSE -eq 1 ]] && \ | 
					
						
							|  |  |  |                         coloured 'blue' "Retry limit reached; abandoning this file" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |                     continue 2 | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-14 16:00:04 +01:00
										 |  |  |                 [[ $VERBOSE -eq 1 ]] && coloured 'blue' "Pausing for $sleeptime and retrying" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |                 sleep $sleeptime | 
					
						
							|  |  |  |             done # until eval ... | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |             coloured 'green' "Uploaded $file to the IA" | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |             echo "$(date +%Y%m%d%H%M%S) Uploaded $file to the IA" >> "$LOGFILE" | 
					
						
							|  |  |  |         fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Count actual uploads and dry-run ones the same | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         ((upload_count++)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # | 
					
						
							|  |  |  |         # Stop the missed file loop if we have reached the limiting number, in | 
					
						
							| 
									
										
										
										
											2024-11-23 22:28:52 +00:00
										 |  |  |         # dry-run and live mode, but not extended mode | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |         # | 
					
						
							| 
									
										
										
										
											2024-11-23 22:28:52 +00:00
										 |  |  |         [[ $EXTENDED -eq 0 && $upload_count -eq $LIMIT ]] && { | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  |             coloured 'blue' "Upload limit ($LIMIT) reached" | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     done # for file in ... | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Summarise how many upload failures were detected | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | if [[ $failures -gt 0 ]]; then | 
					
						
							|  |  |  |     coloured 'red' \ | 
					
						
							|  |  |  |         "There $(ngettext "was $failures upload failure" "were $failures upload failures" $failures)" | 
					
						
							| 
									
										
										
										
											2024-06-15 17:14:22 +01:00
										 |  |  |     coloured 'yellow' 'Run this script again to repeat the repair attempt' | 
					
						
							| 
									
										
										
										
											2024-06-04 16:35:44 +01:00
										 |  |  | fi | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker |