forked from HPR/hpr-tools
		
	Moved project directories and files to an empty local repo
This commit is contained in:
		
							
								
								
									
										453
									
								
								InternetArchive/repair_item
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										453
									
								
								InternetArchive/repair_item
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,453 @@ | ||||
| #!/bin/bash - | ||||
| #=============================================================================== | ||||
| # | ||||
| #         FILE: repair_item | ||||
| # | ||||
| #        USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] itemname | ||||
| # | ||||
| #  DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during | ||||
| #               the upload. | ||||
| # | ||||
| #               The most common failures are caused by the file upload | ||||
| #               processes timing out and being aborted (by the 'ia' tool which | ||||
| #               performs the item creation and the uploads). This failure | ||||
| #               means that a show being processed on 'borg' does not get all | ||||
| #               of the components loaded to the IA. | ||||
| # | ||||
| #               This script looks at the files belonging to the show (stored | ||||
| #               temporarily on 'borg') and determines which have not been | ||||
| #               uploaded, then takes steps to perform the uploads. | ||||
| # | ||||
| #      OPTIONS: --- | ||||
| # REQUIREMENTS: --- | ||||
| #         BUGS: --- | ||||
| #        NOTES: --- | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.0.6 | ||||
| #      CREATED: 2020-01-05 22:42:46 | ||||
| #     REVISION: 2024-05-10 12:39:52 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| #set -o nounset                              # Treat unset variables as an error | ||||
|  | ||||
| VERSION="0.0.6" | ||||
|  | ||||
| SCRIPT=${0##*/} | ||||
| # DIR=${0%/*} | ||||
|  | ||||
| STDOUT="/dev/fd/2" | ||||
|  | ||||
| # | ||||
| # Select the appropriate working directory for the host | ||||
| # | ||||
| case $(hostname) in | ||||
|     i7-desktop) | ||||
|         BASEDIR="$HOME/HPR/InternetArchive" | ||||
|         UPLOADS="$HOME/HPR/IA/uploads" | ||||
|         ;; | ||||
|     borg) | ||||
|         BASEDIR="$HOME/IA" | ||||
|         UPLOADS="/data/IA/uploads" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "Wrong host!" | ||||
|         exit 1 | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; } | ||||
|  | ||||
| # | ||||
| # Load library functions | ||||
| # | ||||
| LIB="$HOME/bin/function_lib.sh" | ||||
| [ -e "$LIB" ] || { echo "Unable to source functions"; exit; } | ||||
| # shellcheck disable=SC1090 | ||||
| source "$LIB" | ||||
|  | ||||
| # | ||||
| # Enable coloured messages | ||||
| # | ||||
| define_colours | ||||
|  | ||||
| # | ||||
| # Sanity checks | ||||
| # | ||||
| JQ=$(command -v jq) | ||||
| [ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; } | ||||
| IA=$(command -v ia) | ||||
| [ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; } | ||||
|  | ||||
| # | ||||
| # Make temporary files and set traps to delete them | ||||
| # | ||||
| TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } | ||||
| trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT | ||||
|  | ||||
|  | ||||
| # {{{ -- Functions -- Upload, exists_in, queued_tasks, _DEBUG, _usage | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: Upload | ||||
| #  DESCRIPTION: Upload a file to the Internet Archive with various options | ||||
| #   PARAMETERS: 1 - the item id (e.g. 'hpr1234' | ||||
| #               2 - the path to the file for upload | ||||
| #               3 - (optional) the path to the file on the IA | ||||
| #               4 - (optional) list of options for 'ia upload' enclosed as | ||||
| #                   a string | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| Upload () { | ||||
|     local id=${1} | ||||
|     local file=${2} | ||||
|     local remote=${3:-} | ||||
|     local options=${4:-} | ||||
|  | ||||
|     if [[ -e $file ]]; then | ||||
|         if [[ -z $remote ]]; then | ||||
|             # shellcheck disable=SC2086 | ||||
|             ia upload ${id} ${file} ${options} | ||||
|         else | ||||
|             # shellcheck disable=SC2086 | ||||
|             ia upload ${id} ${file} --remote-name=${remote} ${options} | ||||
|         fi | ||||
|     else | ||||
|         echo "File missing: $file" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: exists_in | ||||
| #  DESCRIPTION: Checks the existence of a key in an associative array | ||||
| #   PARAMETERS: $1      array name | ||||
| #               $2      key value | ||||
| #      RETURNS: True if the key exists, False otherwise | ||||
| # | ||||
| # Modified from | ||||
| # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array | ||||
| #=============================================================================== | ||||
| exists_in () { | ||||
|     # shellcheck disable=SC2086 | ||||
|     eval '[ ${'$1'[$2]+muahaha} ]' | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: queued_tasks | ||||
| #  DESCRIPTION: Queries the IA for any queued or running tasks for an item. | ||||
| #               Writes the number to STDOUT so it can be captured. | ||||
| #   PARAMETERS: $1      IA item (like hpr1192) | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| queued_tasks () { | ||||
|     local item="${1:?Usage: queued_tasks item}" | ||||
|     local -i count=0 | ||||
|  | ||||
|     count="$(ia tasks "$item" |\ | ||||
|         jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')" | ||||
|  | ||||
|     echo "$count" | ||||
|  | ||||
|     return | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _DEBUG | ||||
| #  DESCRIPTION: Writes a message if in DEBUG mode | ||||
| #   PARAMETERS: List of messages | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _DEBUG () { | ||||
|     [ "$DEBUG" == 0 ] && return | ||||
|     for msg in "$@"; do | ||||
|         printf 'D> %s\n' "$msg" | ||||
|     done | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _usage | ||||
| #  DESCRIPTION: Reports usage; always exits the script after doing so | ||||
| #   PARAMETERS: 1 - the integer to pass to the 'exit' command | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _usage () { | ||||
|     local -i result=${1:-0} | ||||
|  | ||||
|     cat >$STDOUT <<-endusage | ||||
| ${SCRIPT} - version: ${VERSION} | ||||
|  | ||||
| Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] item | ||||
|  | ||||
| Attempts to repair an IA item where the upload has failed for some reason. | ||||
|  | ||||
| Options: | ||||
|   -h                    Print this help | ||||
|   -v                    Run in verbose mode where more information is | ||||
|                         reported. Default is off. | ||||
|   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run | ||||
|                         mode where nothing is changed but the actions that | ||||
|                         will be taken are reported; -d 0 turns off dry-run | ||||
|                         mode and the actions will be carried out. | ||||
|   -D                    Run in debug mode where a lot more information is | ||||
|                         reported | ||||
|   -l N                  Control the number of shows that can be uploaded at | ||||
|                         once. The range is 1 to $DEFLIMIT. | ||||
|  | ||||
| Arguments: | ||||
|     item                The item in the form 'hpr1234' | ||||
|  | ||||
| endusage | ||||
|     exit "$result" | ||||
| } | ||||
|  | ||||
| # }}} | ||||
|  | ||||
| #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| # | ||||
| # Directories and files | ||||
| # | ||||
| LOGS="$BASEDIR/logs" | ||||
| LOGFILE="$LOGS/$SCRIPT.log" | ||||
|  | ||||
| # | ||||
| # Constants | ||||
| # | ||||
| DEFLIMIT=20 | ||||
|  | ||||
| # | ||||
| # Process options | ||||
| # | ||||
| while getopts :d:Dhl:v opt | ||||
| do | ||||
|     case "${opt}" in | ||||
|         D) DEBUG=1;; | ||||
|         d) DRYRUN=$OPTARG;; | ||||
|         h) _usage 0;; | ||||
|         l) LIMIT=$OPTARG;; | ||||
|         v) VERBOSE=1;; | ||||
|         *) echo "** Unknown option" | ||||
|            _usage 1;; | ||||
|     esac | ||||
| done | ||||
| shift $((OPTIND - 1)) | ||||
|  | ||||
| # | ||||
| # Set option defaults and check their values | ||||
| # | ||||
| VERBOSE=${VERBOSE:-0} | ||||
|  | ||||
| DRYRUN=${DRYRUN:-1} | ||||
| if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then | ||||
|     echo "** Use '-d 0' or '-d 1'" | ||||
|     _usage 1 | ||||
| fi | ||||
| [[ $VERBOSE -eq 1 && $DRYRUN -eq 1 ]] && echo "Dry run mode" | ||||
|  | ||||
| DEBUG=${DEBUG:-0} | ||||
| [[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode" | ||||
|  | ||||
| LIMIT=${LIMIT:-$DEFLIMIT} | ||||
| if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then | ||||
|     echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option" | ||||
|     _usage 1 | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Should have one argument | ||||
| # | ||||
| if [[ $# != 1 ]]; then | ||||
|     coloured 'red' "Missing argument" | ||||
|     _usage 1 | ||||
| fi | ||||
| item="${1}" | ||||
|  | ||||
| # | ||||
| # Ensure item spec is correctly formatted | ||||
| # | ||||
| if [[ $item =~ hpr([0-9]+) ]]; then | ||||
|     printf -v item 'hpr%04d' "${BASH_REMATCH[1]}" | ||||
| else | ||||
|     coloured 'red' "Incorrect show specification: $item" | ||||
|     coloured 'yellow' "Use 'hpr9999' format" | ||||
|     exit 1 | ||||
| fi | ||||
| _DEBUG "Parsed item: $item" | ||||
|  | ||||
| # | ||||
| # Declarations | ||||
| # | ||||
| declare -A fcache | ||||
| declare -A iacache | ||||
| declare -a missed | ||||
|  | ||||
| # | ||||
| # Scan the directory 'UPLOADS' where files for upload to the IA are stored and | ||||
| # collect everything for this item (show). | ||||
| # | ||||
| # See the `find' pipeline at the end of the loop which selects only files, not | ||||
| # directories. It outputs the last change time and the full file path, sorts | ||||
| # on the time, then removes it. This ensures we process the files in time | ||||
| # order rather than alphabetic order of their names. | ||||
| # | ||||
| # TODO: This algorithm is from another script and is not needed here. The | ||||
| # order of processing is irrelevant here so simplify the 'find' and the loop. | ||||
| # We are only looking for the 'item' specified by the argument, not other | ||||
| # ones. | ||||
| # | ||||
| while read -r path; do | ||||
|     relpath="${path#"$UPLOADS"/}" | ||||
|     item="${relpath:0:7}" | ||||
|  | ||||
|     [[ $VERBOSE -eq 1 ]] && echo "Found $path" | ||||
|  | ||||
|     _DEBUG "Path:          $path" | ||||
|     _DEBUG "Relative path: $relpath" | ||||
|     _DEBUG "IA item:       $item" | ||||
|  | ||||
|     if ! exists_in fcache "$relpath"; then | ||||
|         # shellcheck disable=SC2034 | ||||
|         fcache[$relpath]=1 | ||||
|     fi | ||||
| done < <(find "$UPLOADS" -type f -regextype posix-extended \ | ||||
|     -regex ".*$item.*" -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort  | cut -f2 -d' ') | ||||
|  | ||||
| # | ||||
| # Did we find anything? | ||||
| # | ||||
| if [[ ${#fcache[@]} -eq 0 ]]; then | ||||
|     coloured 'red' "No files found for item $item in $UPLOADS" | ||||
|     coloured 'red' "Can't continue" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Look to see if there are any tasks queued for this show on the IA servers. | ||||
| # If there are we can't continue. | ||||
| # | ||||
| # TODO: This could be a loop waiting for tasks to complete rather than | ||||
| # aborting and asking to be rerun. | ||||
| # | ||||
| tasks=$(queued_tasks "$item") | ||||
| if [[ $tasks -gt 0 ]]; then | ||||
|     coloured 'red' \ | ||||
|         "Item $item still has $tasks unfinished $(ngettext task tasks "$tasks")" | ||||
|     coloured 'red' "Allow time for task(s) to finish and try again later" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Interrogate the IA for the required item contents. If it returns True we can | ||||
| # collect its contents, otherwise we can't proceed. The file 'TMP1' contains | ||||
| # just a simple list of the files on the IA relating to this item. | ||||
| # | ||||
| if ia list "$item" > "$TMP1"; then | ||||
|     while read -r iafile; do | ||||
|         # shellcheck disable=SC2034 | ||||
|         iacache[$iafile]=1 | ||||
|     done < "$TMP1" | ||||
| else | ||||
|     coloured 'red' "Item $item is not in the IA" | ||||
|     coloured 'red' "Can't continue" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Look through the list of files we found and detect any not on the IA | ||||
| # | ||||
| for path in "${!fcache[@]}"; do | ||||
|     if ! exists_in iacache "$path"; then | ||||
|         missed+=("$path") | ||||
|     fi | ||||
| done | ||||
|  | ||||
| # | ||||
| # Counters and defaults for the loop | ||||
| # | ||||
| retry_threshold=5 | ||||
| sleeptime=20 | ||||
| failures=0 | ||||
| upload_count=0 | ||||
|  | ||||
| # | ||||
| # If there are missed files we can report what we'd be doing or do it, | ||||
| # otherwise we have nothing to do. | ||||
| # | ||||
| if [[ ${#missed[@]} -eq 0 ]]; then | ||||
|     coloured 'green' "All expected files for item $item are on the IA" | ||||
| else | ||||
|     mcount="${#missed[@]}" | ||||
|     coloured 'red' "There $(ngettext "is 1 missing file" "are $mcount missing files" "$mcount"):" | ||||
|  | ||||
|     [[ $DRYRUN -eq 1 ]] && { | ||||
|         coloured 'blue' "Dry run: Would have run the following command(s):" | ||||
|     } | ||||
|  | ||||
|     for file in "${missed[@]}"; do | ||||
|         cmd="Upload $item $UPLOADS/$file " | ||||
|         cmd+="'$file' '--retries=5 --no-derive -H x-archive-keep-old-version:0'" | ||||
|  | ||||
|         if [[ $DRYRUN -eq 1 ]]; then | ||||
|             coloured 'yellow' "$cmd" | ||||
|         else | ||||
|             retries=0 | ||||
|  | ||||
|             printf 'Uploading %s\n' "$file" | ||||
|  | ||||
|             # | ||||
|             # Run 'cmd'. If it succeeds then write to the log and loop for the | ||||
|             # next missing file. If it fails enter the 'until' loop and report | ||||
|             # and the problem. Count the number of times this is done, so it | ||||
|             # doesn't loop forever. If we have reached the limit count this as | ||||
|             # a failure and continue the parent loop (with the next missing | ||||
|             # file). If we haven't retried enough yet, sleep for a while and | ||||
|             # try again. The intention is to catch the case when an upload | ||||
|             # times out. The 'ia' command is performing its own retries per | ||||
|             # upload when the system is overloaded, but these are non-fatal. | ||||
|             # | ||||
|             until eval "$cmd"; do | ||||
|                 coloured 'red' "Failure when invoking the Upload command!" | ||||
|                 ((retries++)) | ||||
|  | ||||
|                 printf '%s Failed to upload %s to the IA [%d]\n' \ | ||||
|                     "$(date +%Y%m%d%H%M%S)" "$file" "$retries" >> "$LOGFILE" | ||||
|  | ||||
|                 [ "$retries" -eq "$retry_threshold" ] && { | ||||
|                     ((failures++)) | ||||
|                     continue 2 | ||||
|                 } | ||||
|  | ||||
|                 sleep $sleeptime | ||||
|             done # until eval ... | ||||
|  | ||||
|             echo "$(date +%Y%m%d%H%M%S) Uploaded $file to the IA" >> "$LOGFILE" | ||||
|         fi | ||||
|  | ||||
|         # | ||||
|         # Count actual uploads and dry-run ones the same | ||||
|         # | ||||
|         ((upload_count++)) | ||||
|  | ||||
|         # | ||||
|         # Stop the missed file loop if we have reached the limiting number, in | ||||
|         # dry-run and live mode | ||||
|         # | ||||
|         [[ $upload_count -eq $LIMIT ]] && { | ||||
|             coloured 'blue' "Upload limit ($LIMIT) reached" | ||||
|             break | ||||
|         } | ||||
|  | ||||
|     done # for file in ... | ||||
|  | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Summarise how many upload failures were detected | ||||
| # | ||||
| if [[ $failures -gt 0 ]]; then | ||||
|     coloured 'red' \ | ||||
|         "There $(ngettext "was $failures upload failure" "were $failures upload failures" $failures)" | ||||
|     coloured 'yellow' 'Run this script again to repeat the repair attemmpt' | ||||
| fi | ||||
|  | ||||
| # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker | ||||
		Reference in New Issue
	
	Block a user