19030fee71
InternetArchive/future_upload: Added logging and debugging InternetArchive/ia_db.sql: Added new tables InternetArchive/recover_transcripts: New script to run on 'borg' and copy missing files from the backup disk to the IA InternetArchive/repair_assets: More comments, including one about a bug in the design. InternetArchive/repair_item: Fix relating to octal numbers (if there are leading zeroes in a number). '_DEBUG' is now in the function library. Added comments to explain obscure stuff. InternetArchive/snapshot_metadata: New Bash script (to run on my desktop) which collects metadata for a show and stores in in the '~/HPR/IA/assets' directory. Runs 'view_derivatives' on it to find derivative files for deletion. InternetArchive/tidy_uploaded: Moves files and directories containing uploaded files into a holding area for later backup. Added debugging, logging and a 'force' mode. InternetArchive/upload_manager: Manages 'ia.db' (on my workstation). Needs many updates which have just started to be added. InternetArchive/weekly_upload: Old script, now obsolete.
498 lines
16 KiB
Bash
Executable File
498 lines
16 KiB
Bash
Executable File
#!/bin/bash -
|
|
#===============================================================================
|
|
#
|
|
# FILE: tidy_uploaded
|
|
#
|
|
# USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT]
|
|
#
|
|
# DESCRIPTION: Relocates HPR audio and other show-related files on 'borg'
|
|
# after their shows have been uploaded to the Internet Archive
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: ---
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.0.11
|
|
# CREATED: 2022-03-30 17:38:01
|
|
# REVISION: 2024-07-29 18:24:26
|
|
#
|
|
#===============================================================================
|
|
|
|
set -o nounset # Treat unset variables as an error
|
|
|
|
VERSION="0.0.11"
|
|
|
|
SCRIPT=${0##*/}
|
|
# DIR=${0%/*}
|
|
|
|
STDOUT="/dev/fd/2"
|
|
|
|
#
|
|
# Load library functions
|
|
#
|
|
LIB="$HOME/bin/function_lib.sh"
|
|
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
|
|
# shellcheck disable=SC1090
|
|
source "$LIB"
|
|
|
|
#
|
|
# Make temporary files and set traps to delete them
|
|
#
|
|
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
|
|
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
|
|
|
|
#
|
|
# Configure depending whether local or on the VPS
|
|
#
|
|
case $HOSTNAME in
|
|
borg) BASEDIR="$HOME/InternetArchive"
|
|
UPLOADS="/data/IA/uploads"
|
|
ARCHIVE="/data/IA/done" ;;
|
|
i7-desktop) BASEDIR="$HOME/HPR/InternetArchive"
|
|
UPLOADS="$HOME/HPR/IA/uploads"
|
|
ARCHIVE="$HOME/HPR/IA/done";;
|
|
*) echo "Wrong host!"; exit 1 ;;
|
|
esac
|
|
|
|
# {{{ -- Functions -- exists_in, queued_tasks, movefile, is_empty, _log, _usage
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: exists_in
|
|
# DESCRIPTION: Checks the existence of a key in an associative array
|
|
# PARAMETERS: $1 array name
|
|
# $2 key value
|
|
# RETURNS: True if the key exists, False otherwise
|
|
#
|
|
# Modified from
|
|
# https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array
|
|
#===============================================================================
|
|
exists_in () {
|
|
# shellcheck disable=SC2086
|
|
eval '[ ${'$1'[$2]+muahaha} ]'
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: queued_tasks
|
|
# DESCRIPTION: Queries the IA for any queued or running tasks for an item.
|
|
# Writes the number to STDOUT so it can be captured.
|
|
# PARAMETERS: $1 IA item (like hpr1192)
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
queued_tasks () {
|
|
local item="${1:?Usage: queued_tasks item}"
|
|
local -i count=0
|
|
|
|
count="$(ia tasks "$item" |\
|
|
jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
|
|
|
|
echo "$count"
|
|
|
|
return
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: movefile
|
|
# DESCRIPTION: Moves a file to a new place, catering for any directories in
|
|
# the path
|
|
# PARAMETERS: $1 directory to move form
|
|
# $2 directory to move to
|
|
# $3 file (or sub-path to move)
|
|
# RETURNS: True if a move was done, otherwise False
|
|
#===============================================================================
|
|
movefile () {
|
|
local fromdir="${1:?Usage: movefile fromdir todir path [FORCE]}"
|
|
local todir="${2:?Usage: movefile fromdir todir path [FORCE]}"
|
|
local path="${3:?Usage: movefile fromdir todir path [FORCE]}"
|
|
local FORCE="${4:-0}"
|
|
|
|
[[ ! -v FORCE ]] && FORCE=0
|
|
|
|
#
|
|
# Chop up the path. If it's just a file name then $dir and $file are the
|
|
# same, in which case we make $dir empty.
|
|
#
|
|
local dir="${path%/*}"
|
|
local file="${path##*/}"
|
|
[[ $dir = "$file" ]] && dir=''
|
|
|
|
#
|
|
# If we have a directory in the path check it exists in the 'to' directory
|
|
# and create it if not
|
|
#
|
|
if [[ -n $dir ]]; then
|
|
if [[ ! -d $dir ]]; then
|
|
mkdir -p "$todir/$dir"
|
|
fi
|
|
fi
|
|
|
|
#
|
|
# Does the file exist already?
|
|
# TODO: Compare the two files?
|
|
#
|
|
if [[ -e $todir/$path ]]; then
|
|
if [[ $FORCE -eq 1 ]]; then
|
|
echo "File exists: $todir/$path"
|
|
echo "FORCE mode is ON so overwriting"
|
|
mv --force "$fromdir/$path" "$todir/$path"
|
|
echo "Moved $fromdir/$path"
|
|
return 0
|
|
else
|
|
echo "File already exists: $todir/$path"
|
|
return 1
|
|
fi
|
|
else
|
|
mv "$fromdir/$path" "$todir/$path"
|
|
echo "Moved $fromdir/$path"
|
|
return 0
|
|
fi
|
|
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: is_empty
|
|
# DESCRIPTION: Check whether a directory is empty (of files)
|
|
# PARAMETERS: $1 Directory to test
|
|
# RETURNS: True if empty (of files), otherwise false
|
|
#===============================================================================
|
|
is_empty() {
|
|
test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)"
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _log
|
|
# DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script
|
|
# using the predefined $LOGREC, a template for 'printf'. If the
|
|
# latter is not defined the function will use a default.
|
|
# For some reason 'shellcheck' objects to this function. The
|
|
# first argument to 'printf' needs to be -1 to make the
|
|
# '%(fmt)T' use today's date and time.
|
|
# PARAMETERS: 1 - the message to write
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
# shellcheck disable=SC2317 disable=SC2059
|
|
_log () {
|
|
local msg="$1"
|
|
|
|
# echo "D> $LOGFILE $LOGREC"
|
|
[ -v LOGFILE ] || { echo "${FUNCNAME[0]}: \$LOGFILE is not defined"; exit 1; }
|
|
[ -v LOGREC ] || { local LOGREC='%(%F %T)T %s\n'; }
|
|
|
|
# echo "D> $LOGFILE $LOGREC"
|
|
printf "$LOGREC" -1 "$msg" >> "$LOGFILE"
|
|
|
|
return
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _usage
|
|
# DESCRIPTION: Report usage
|
|
# PARAMETERS: 1 [optional] exit value
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
_usage () {
|
|
local -i res="${1:-0}"
|
|
|
|
cat >$STDOUT <<-endusage
|
|
${SCRIPT} - version: ${VERSION}
|
|
|
|
Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D]
|
|
|
|
Moves HPR audio and other show-related files on 'borg' after their shows
|
|
have been uploaded to the Internet Archive. Files to be uploaded are in the
|
|
directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}.
|
|
|
|
Options:
|
|
-h Print this help
|
|
-v Run in verbose mode where more information is reported
|
|
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
|
|
mode where nothing is moved but the actions that
|
|
will be taken are reported; -d 0 turns off dry-run
|
|
mode and the actions will be carried out.
|
|
-c COUNT Count of shows to process. If omitted or zero then all
|
|
shows will be processed, otherwise this is the number
|
|
to stop at.
|
|
-D Run in debug mode where a lot more information is
|
|
reported
|
|
-F Turn on FORCE mode (normally off). In this mode when
|
|
the files being tidied (moved) already exist, they are
|
|
overwritten. This is for the very rare case when
|
|
a show's audio has to be re-uploaded because of bad
|
|
audio or the wrong file being sent.
|
|
|
|
Examples
|
|
./tidy_uploaded # Run in (default) dry-run mode
|
|
./tidy_uploaded -v # Dry-run mode with verbose messages
|
|
./tidy_uploaded -d0 # Live mode (without verbose messages)
|
|
./tidy_uploaded -c1 # Process 1 show in dry-run mode
|
|
./tidy_uploaded -D # Run with debugging enabled
|
|
./tidy_uploaded -F # Run with FORCE mode on
|
|
|
|
endusage
|
|
exit "$res"
|
|
}
|
|
|
|
# }}}
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
#
|
|
# Directories and files
|
|
#
|
|
LOGS="$BASEDIR/logs"
|
|
LOGFILE="$LOGS/$SCRIPT.log"
|
|
LOGREC='%(%F %T)T %s\n'
|
|
|
|
#
|
|
# Process options
|
|
#
|
|
while getopts :c:d:DFhv opt
|
|
do
|
|
case "${opt}" in
|
|
c) COUNT=$OPTARG;;
|
|
D) DEBUG=1;;
|
|
d) DRYRUN=$OPTARG;;
|
|
F) FORCE=1;;
|
|
h) _usage 0;;
|
|
v) VERBOSE=1;;
|
|
*) echo "** Unknown option"
|
|
_usage 1;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
COUNT=${COUNT:-0}
|
|
if [[ ! $COUNT =~ ^[0-9]+$ ]]; then
|
|
echo "** Use a numeric argument with -c"
|
|
_usage 1
|
|
fi
|
|
|
|
DRYRUN=${DRYRUN:-1}
|
|
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
|
|
echo "** Use '-d 0' or '-d 1'"
|
|
_usage 1
|
|
fi
|
|
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
|
|
|
|
FORCE=${FORCE:-0}
|
|
[[ $FORCE -eq 1 ]] && echo "Force mode - overwriting existing files"
|
|
|
|
VERBOSE=${VERBOSE:-0}
|
|
|
|
DEBUG=${DEBUG:-0}
|
|
[[ $DEBUG -eq 1 ]] && echo "Debug mode"
|
|
|
|
#
|
|
# Should have no arguments
|
|
#
|
|
if [[ $# != 0 ]]; then
|
|
echo "** ${SCRIPT} takes no arguments"
|
|
_usage 1
|
|
fi
|
|
|
|
#
|
|
# Declarations
|
|
#
|
|
declare -A seen
|
|
declare -a dirs
|
|
# lastitem=
|
|
ind=0
|
|
|
|
#
|
|
# Scan the directory 'UPLOADS' where files for upload to the IA are stored.
|
|
#
|
|
# See the `find' pipeline at the end of the loop which outputs the last change
|
|
# time and the full file path, sorts on the time, then removes it. This
|
|
# ensures we process the files in time order rather than alphabetic order of
|
|
# their names.
|
|
#
|
|
while read -r path; do
|
|
#
|
|
# Extract the path relative to $UPLOADS and the IA item name from the
|
|
# returned path. Here $relpath will be the filename or a sub-directory and
|
|
# filename, and $item will be the IA identifier like 'hpr1192'.
|
|
#
|
|
relpath="${path#"$UPLOADS"/}"
|
|
item="${relpath:0:7}"
|
|
|
|
[[ $VERBOSE -eq 1 ]] && echo "Found $path"
|
|
|
|
_DEBUG "Path: $path"
|
|
_DEBUG "Relative path: $relpath"
|
|
_DEBUG "IA item: $item"
|
|
|
|
#
|
|
# Detect that the item prefix has changed. If it has we're processing
|
|
# a new IA identifier, so work on this one
|
|
#
|
|
# If we have seen this item before we don't need to process it, so just
|
|
# skip this loop iteration
|
|
#
|
|
|
|
#
|
|
# Never seen before, so process it
|
|
#
|
|
if ! exists_in seen "$item"; then
|
|
# shellcheck disable=SC2034
|
|
seen[$item]=1
|
|
|
|
#
|
|
# Count this item and stop the loop if we've reached the requested
|
|
# count. We want the value of $ind to be the number of shows
|
|
# processed, so adjust it if we stopped after incrementing it.
|
|
#
|
|
((ind++))
|
|
if [[ $COUNT -gt 0 ]]; then
|
|
if [[ $ind -gt $COUNT ]]; then
|
|
((ind--))
|
|
break
|
|
fi
|
|
echo "[ Show #$ind ]"
|
|
fi
|
|
|
|
#
|
|
# Look to see if there are any tasks queued for this show. If there
|
|
# are we'll skip it just now.
|
|
#
|
|
tasks=$(queued_tasks "$item")
|
|
if [[ $tasks -gt 0 ]]; then
|
|
echo "** Item $item still has $tasks unfinished " \
|
|
"$(ngettext task tasks "$tasks")"
|
|
echo "** Skipping to the next item"
|
|
continue
|
|
fi
|
|
|
|
[[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item"
|
|
|
|
#
|
|
# Interrogate the IA for the item we're working on. If it returns True
|
|
# we can proceed with tidying. The file 'TMP1' contains just a simple
|
|
# list of the files on the IIA relating to this item.
|
|
#
|
|
if ia list "$item" > "$TMP1"; then
|
|
#
|
|
# Save any directory associated with this item. This means that
|
|
# directories with names that don't conform to the "^hpr[0-9]{4}"
|
|
# pattern will be ignored, but this it *not* expected to happen.
|
|
# Note that directories without corresponding audio will not be
|
|
# cleaned up by this method, but again this is not expected to
|
|
# happen.
|
|
# TODO: be alert to such issues!
|
|
#
|
|
dirpath="$UPLOADS/$item"
|
|
if [[ -d "$dirpath" ]]; then
|
|
echo "Storing directory: $item"
|
|
dirs+=("$item")
|
|
fi
|
|
|
|
moves=0
|
|
|
|
#
|
|
# Scan the returned list to see if any files we have are online.
|
|
# Move to the ARCHIVE directory when there's a match.
|
|
#
|
|
while read -r file; do
|
|
frompath="$UPLOADS/$file"
|
|
topath="$ARCHIVE/$file"
|
|
|
|
if [[ -e "$frompath" ]]; then
|
|
#
|
|
# A file on the IA exists in the upload area. Move the
|
|
# local one if we're not in dry-run mode, otherwise just
|
|
# report the move we would do. If FORCE mode is on
|
|
# overwrite the file.
|
|
#
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
movefile "$UPLOADS" "$ARCHIVE" "$file" "$FORCE" && ((moves++))
|
|
else
|
|
if [[ $FORCE -eq 0 ]]; then
|
|
printf 'Would move %s\n\tto %s\n' "$frompath" "$topath"
|
|
else
|
|
printf 'Would move %s\n\toverwriting %s\n' "$frompath" "$topath"
|
|
fi
|
|
fi
|
|
fi
|
|
done < "$TMP1"
|
|
|
|
#
|
|
# Log this item
|
|
#
|
|
[[ $DRYRUN -eq 0 ]] && \
|
|
printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \
|
|
"$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE"
|
|
|
|
else
|
|
printf 'Skipping %s; not in the IA\n' "$item"
|
|
fi
|
|
else
|
|
#
|
|
# Ignore all but the first file belonging to an IA identifier
|
|
#
|
|
_DEBUG "Skipped $path - repeated show number"
|
|
continue
|
|
fi
|
|
|
|
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort | cut -f2 -d' ')
|
|
|
|
# Old 'find' used:
|
|
# done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
|
|
|
|
#
|
|
# No shows processed? There was nothing to do
|
|
#
|
|
if [[ $ind -eq 0 ]]; then
|
|
[[ $DRYRUN -eq 0 ]] && echo "Nothing to do"
|
|
exit
|
|
fi
|
|
|
|
_DEBUG "Number of shows scanned: $ind"
|
|
# _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")"
|
|
|
|
#
|
|
# If there are no directories just exit.
|
|
#
|
|
[[ -v dirs ]] || exit
|
|
|
|
#
|
|
# By an (as yet) unknown process we might get duplicates, so remove them here.
|
|
#
|
|
# mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq)
|
|
declare -A unique
|
|
for e in "${dirs[@]}"; do unique[$e]=1; done
|
|
dirs=( "${!unique[@]}" )
|
|
# mapfile -t dirs < <(printf '%s\n' "${!unique[@]}")
|
|
|
|
_DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")"
|
|
|
|
#
|
|
# Clean up any empty directories. These may exist because we moved their
|
|
# contents one file at a time. We only deal with the directories we've visited
|
|
# though.
|
|
#
|
|
for dir in "${dirs[@]}"; do
|
|
path="$UPLOADS/$dir"
|
|
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
if is_empty "$path"; then
|
|
rm -rf "$path"
|
|
RES=$?
|
|
if [[ $RES -eq 0 ]]; then
|
|
echo "Deleted $path"
|
|
echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE"
|
|
else
|
|
echo "Failed to delete: $path"
|
|
fi
|
|
else
|
|
echo "Directory is not empty: $path"
|
|
echo "Not deleted!"
|
|
fi
|
|
else
|
|
echo "Would delete directory $path"
|
|
fi
|
|
|
|
done
|
|
|
|
exit
|
|
|
|
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker
|