#!/bin/bash - #=============================================================================== # # FILE: tidy_uploaded # # USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT] # # DESCRIPTION: Relocates HPR audio and other show-related files on 'borg' # after their shows have been uploaded to the Internet Archive # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # VERSION: 0.0.10 # CREATED: 2022-03-30 17:38:01 # REVISION: 2022-07-30 14:30:43 # #=============================================================================== set -o nounset # Treat unset variables as an error VERSION="0.0.10" SCRIPT=${0##*/} # DIR=${0%/*} STDOUT="/dev/fd/2" # # Load library functions # LIB="$HOME/bin/function_lib.sh" [ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; } # shellcheck disable=SC1090 source "$LIB" # # Make temporary files and set traps to delete them # TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT # # Configure depending whether local or on the VPS # case $HOSTNAME in borg) BASEDIR="$HOME/InternetArchive" UPLOADS="/data/IA/uploads" ARCHIVE="/data/IA/done" ;; i7-desktop) BASEDIR="$HOME/HPR/InternetArchive" UPLOADS="$HOME/HPR/IA/uploads" ARCHIVE="$HOME/HPR/IA/done";; *) echo "Wrong host!"; exit 1 ;; esac #=== FUNCTION ================================================================ # NAME: exists_in # DESCRIPTION: Checks the existence of a key in an associative array # PARAMETERS: $1 array name # $2 key value # RETURNS: True if the key exists, False otherwise # # Modified from # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array #=============================================================================== exists_in () { # shellcheck disable=SC2086 eval '[ ${'$1'[$2]+muahaha} ]' } #=== FUNCTION ================================================================ # NAME: queued_tasks # DESCRIPTION: Queries the IA for any queued or running tasks for an item. # Writes the number to STDOUT so it can be captured. # PARAMETERS: $1 IA item (like hpr1192) # RETURNS: Nothing #=============================================================================== queued_tasks () { local item="${1:?Usage: queued_tasks item}" local -i count=0 count="$(ia tasks "$item" |\ jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')" echo "$count" return } #=== FUNCTION ================================================================ # NAME: movefile # DESCRIPTION: Moves a file to a new place, catering for any directories in # the path # PARAMETERS: $1 directory to move form # $2 directory to move to # $3 file (or sub-path to move) # RETURNS: True if a move was done, otherwise False #=============================================================================== movefile () { local fromdir="${1:?Usage: movefile fromdir todir path}" local todir="${2:?Usage: movefile fromdir todir path}" local path="${3:?Usage: movefile fromdir todir path}" # # Chop up the path. If it's just a file name then $dir and $file are the # same, in which case we make $dir empty. # local dir="${path%/*}" local file="${path##*/}" [[ $dir = "$file" ]] && dir='' # # If we have a directory in the path check it exists in the 'to' directory # and create it if not # if [[ -n $dir ]]; then if [[ ! -d $dir ]]; then mkdir -p "$todir/$dir" fi fi # # Does the file exist already? # TODO: Compare the two files? # if [[ -e $todir/$path ]]; then echo "File already exists: $todir/$path" return 1 else mv "$fromdir/$path" "$todir/$path" echo "Moved $fromdir/$path" return 0 fi } #=== FUNCTION ================================================================ # NAME: is_empty # DESCRIPTION: Check whether a directory is empty (of files) # PARAMETERS: $1 Directory to test # RETURNS: True if empty (of files), otherwise false #=============================================================================== is_empty() { test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)" } #=== FUNCTION ================================================================ # NAME: _DEBUG # DESCRIPTION: Writes a message if in DEBUG mode # PARAMETERS: List of messages # RETURNS: Nothing #=============================================================================== _DEBUG () { [ "$DEBUG" == 0 ] && return for msg in "$@"; do printf 'D> %s\n' "$msg" done } #=== FUNCTION ================================================================ # NAME: _usage # DESCRIPTION: Report usage # PARAMETERS: 1 [optional] exit value # RETURNS: Nothing #=============================================================================== _usage () { local -i res="${1:-0}" cat >$STDOUT <<-endusage ${SCRIPT} - version: ${VERSION} Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D] Moves HPR audio and other show-related files on 'borg' after their shows have been uploaded to the Internet Archive. Files to be uploaded are in the directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}. Options: -h Print this help -v Run in verbose mode where more information is reported -d 0|1 Dry run: -d 1 (the default) runs the script in dry-run mode where nothing is moved but the actions that will be taken are reported; -d 0 turns off dry-run mode and the actions will be carried out. -c COUNT Count of shows to process. If omitted or zero then all shows will be processed, otherwise this is the number to stop at. -D Run in debug mode where a lot more information is reported Examples ./tidy_uploaded # Run in (default) dry-run mode ./tidy_uploaded -v # Dry-run mode with verbose messages ./tidy_uploaded -d0 # Live mode (without verbose messages) ./tidy_uploaded -c1 # Process 1 show in dry-run mode ./tidy_uploaded -D # Run with debugging enabled endusage exit "$res" } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Directories and files # LOGS="$BASEDIR/logs" LOGFILE="$LOGS/$SCRIPT.log" # # Process options # while getopts :c:d:Dhv opt do case "${opt}" in c) COUNT=$OPTARG;; D) DEBUG=1;; d) DRYRUN=$OPTARG;; h) _usage 0;; v) VERBOSE=1;; *) echo "** Unknown option" _usage 1;; esac done shift $((OPTIND - 1)) COUNT=${COUNT:-0} if [[ ! $COUNT =~ ^[0-9]+$ ]]; then echo "** Use a numeric argument with -c" _usage 1 fi DRYRUN=${DRYRUN:-1} if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then echo "** Use '-d 0' or '-d 1'" _usage 1 fi [[ $DRYRUN -eq 1 ]] && echo "Dry run mode" VERBOSE=${VERBOSE:-0} DEBUG=${DEBUG:-0} [[ $DEBUG -eq 1 ]] && echo "Debug mode" # # Should have no arguments # if [[ $# != 0 ]]; then echo "** ${SCRIPT} takes no arguments" _usage 1 fi # # Declarations # declare -A seen declare -a dirs # lastitem= ind=0 # # Scan the directory 'UPLOADS' where files for upload to the IA are stored. # # See the `find' pipeline at the end of the loop which outputs the last change # time and the full file path, sorts on the time, then removes it. This # ensures we process the files in time order rather than alphabetic order of # their names. # while read -r path; do # # Extract the path relative to $UPLOADS and the IA item name from the # returned path. Here $relpath will be the filename or a sub-directory and # filename, and $item will be the IA identifier like 'hpr1192'. # relpath="${path#"$UPLOADS"/}" item="${relpath:0:7}" [[ $VERBOSE -eq 1 ]] && echo "Found $path" _DEBUG "Path: $path" _DEBUG "Relative path: $relpath" _DEBUG "IA item: $item" # # Detect that the item prefix has changed. If it has we're processing # a new IA identifier, so work on this one # # If we have seen this item before we don't need to process it, so just # skip this loop iteration # # # Never seen before, so process it # if ! exists_in seen "$item"; then # shellcheck disable=SC2034 seen[$item]=1 # # Count this item and stop the loop if we've reached the requested # count. We want the value of $ind to be the number of shows # processed, so adjust it if we stopped after incrementing it. # ((ind++)) if [[ $COUNT -gt 0 ]]; then if [[ $ind -gt $COUNT ]]; then ((ind--)) break fi echo "[ Show #$ind ]" fi # # Look to see if there are any tasks queued for this show. If there # are we'll skip it just now. # tasks=$(queued_tasks "$item") if [[ $tasks -gt 0 ]]; then echo "** Item $item still has $tasks unfinished " \ "$(ngettext task tasks "$tasks")" echo "** Skipping to the next item" continue fi [[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item" # # Interrogate the IA for the item we're working on. If it returns True # we can proceed with tidying. The file 'TMP1' contains just a simple # list of the files on the IIA relating to this item. # if ia list "$item" > "$TMP1"; then # # Save any directory associated with this item. This means that # directories with names that don't conform to the "^hpr[0-9]{4}" # pattern will be ignored, but this it *not* expected to happen. # Note that directories without corresponding audio will not be # cleaned up by this method, but again this is not expected to # happen. # TODO: be alert to such issues! # dirpath="$UPLOADS/$item" if [[ -d "$dirpath" ]]; then echo "Storing directory: $item" dirs+=("$item") fi moves=0 # # Scan the returned list to see if any files we have are online. # Move to the ARCHIVE directory when there's a match. # while read -r file; do frompath="$UPLOADS/$file" topath="$ARCHIVE/$file" if [[ -e "$frompath" ]]; then # # A file on the IA exists in the upload area. Move the # local one if we're not in dry-run mode, otherwise just # report the move we would do. # if [[ $DRYRUN -eq 0 ]]; then movefile "$UPLOADS" "$ARCHIVE" "$file" && ((moves++)) else printf 'Would move %s\n\tto %s\n' "$frompath" "$topath" fi fi done < "$TMP1" # # Log this item # [[ $DRYRUN -eq 0 ]] && \ printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \ "$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE" else printf 'Skipping %s; not in the IA\n' "$item" fi else # # Ignore all but the first file belonging to an IA identifier # _DEBUG "Skipped $path - repeated show number" continue fi done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort | cut -f2 -d' ') # Old 'find' used: # done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort) # # No shows processed? There was nothing to do # if [[ $ind -eq 0 ]]; then [[ $DRYRUN -eq 0 ]] && echo "Nothing to do" exit fi _DEBUG "Number of shows scanned: $ind" # _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")" # # If there are no directories just exit. # [[ -v dirs ]] || exit # # By an (as yet) unknown process we might get duplicates, so remove them here. # # mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq) declare -A unique for e in "${dirs[@]}"; do unique[$e]=1; done dirs=( "${!unique[@]}" ) # mapfile -t dirs < <(printf '%s\n' "${!unique[@]}") _DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")" # # Clean up any empty directories. These may exist because we moved their # contents one file at a time. We only deal with the directories we've visited # though. # for dir in "${dirs[@]}"; do path="$UPLOADS/$dir" if [[ $DRYRUN -eq 0 ]]; then if is_empty "$path"; then rm -rf "$path" RES=$? if [[ $RES -eq 0 ]]; then echo "Deleted $path" echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE" else echo "Failed to delete: $path" fi else echo "Directory is not empty: $path" echo "Not deleted!" fi else echo "Would delete directory $path" fi done exit # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker