forked from HPR/hpr-tools
		
	
		
			
	
	
		
			455 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			455 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | #!/bin/bash - | ||
|  | #=============================================================================== | ||
|  | # | ||
|  | #         FILE: tidy_uploaded | ||
|  | # | ||
|  | #        USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT] | ||
|  | # | ||
|  | #  DESCRIPTION: Relocates HPR audio and other show-related files on 'borg' | ||
|  | #               after their shows have been uploaded to the Internet Archive | ||
|  | # | ||
|  | #      OPTIONS: --- | ||
|  | # REQUIREMENTS: --- | ||
|  | #         BUGS: --- | ||
|  | #        NOTES: --- | ||
|  | #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||
|  | #      VERSION: 0.0.10 | ||
|  | #      CREATED: 2022-03-30 17:38:01 | ||
|  | #     REVISION: 2022-07-30 14:30:43 | ||
|  | # | ||
|  | #=============================================================================== | ||
|  | 
 | ||
|  | set -o nounset                              # Treat unset variables as an error | ||
|  | 
 | ||
|  | VERSION="0.0.10" | ||
|  | 
 | ||
|  | SCRIPT=${0##*/} | ||
|  | # DIR=${0%/*} | ||
|  | 
 | ||
|  | STDOUT="/dev/fd/2" | ||
|  | 
 | ||
|  | # | ||
|  | # Load library functions | ||
|  | # | ||
|  | LIB="$HOME/bin/function_lib.sh" | ||
|  | [ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; } | ||
|  | # shellcheck disable=SC1090 | ||
|  | source "$LIB" | ||
|  | 
 | ||
|  | # | ||
|  | # Make temporary files and set traps to delete them | ||
|  | # | ||
|  | TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } | ||
|  | trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT | ||
|  | 
 | ||
|  | # | ||
|  | # Configure depending whether local or on the VPS | ||
|  | # | ||
|  | case $HOSTNAME in | ||
|  |     borg)       BASEDIR="$HOME/InternetArchive" | ||
|  |                 UPLOADS="/data/IA/uploads" | ||
|  |                 ARCHIVE="/data/IA/done" ;; | ||
|  |     i7-desktop) BASEDIR="$HOME/HPR/InternetArchive" | ||
|  |                 UPLOADS="$HOME/HPR/IA/uploads" | ||
|  |                 ARCHIVE="$HOME/HPR/IA/done";; | ||
|  |     *)          echo "Wrong host!"; exit 1 ;; | ||
|  | esac | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: exists_in | ||
|  | #  DESCRIPTION: Checks the existence of a key in an associative array | ||
|  | #   PARAMETERS: $1      array name | ||
|  | #               $2      key value | ||
|  | #      RETURNS: True if the key exists, False otherwise | ||
|  | # | ||
|  | # Modified from | ||
|  | # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array | ||
|  | #=============================================================================== | ||
|  | exists_in () { | ||
|  |     # shellcheck disable=SC2086 | ||
|  |     eval '[ ${'$1'[$2]+muahaha} ]' | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: queued_tasks | ||
|  | #  DESCRIPTION: Queries the IA for any queued or running tasks for an item. | ||
|  | #               Writes the number to STDOUT so it can be captured. | ||
|  | #   PARAMETERS: $1      IA item (like hpr1192) | ||
|  | #      RETURNS: Nothing | ||
|  | #=============================================================================== | ||
|  | queued_tasks () { | ||
|  |     local item="${1:?Usage: queued_tasks item}" | ||
|  |     local -i count=0 | ||
|  | 
 | ||
|  |     count="$(ia tasks "$item" |\ | ||
|  |         jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')" | ||
|  | 
 | ||
|  |     echo "$count" | ||
|  | 
 | ||
|  |     return | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: movefile | ||
|  | #  DESCRIPTION: Moves a file to a new place, catering for any directories in | ||
|  | #               the path | ||
|  | #   PARAMETERS: $1      directory to move form | ||
|  | #               $2      directory to move to | ||
|  | #               $3      file (or sub-path to move) | ||
|  | #      RETURNS: True if a move was done, otherwise False | ||
|  | #=============================================================================== | ||
|  | movefile () { | ||
|  |     local fromdir="${1:?Usage: movefile fromdir todir path}" | ||
|  |     local todir="${2:?Usage: movefile fromdir todir path}" | ||
|  |     local path="${3:?Usage: movefile fromdir todir path}" | ||
|  | 
 | ||
|  |     # | ||
|  |     # Chop up the path. If it's just a file name then $dir and $file are the | ||
|  |     # same, in which case we make $dir empty. | ||
|  |     # | ||
|  |     local dir="${path%/*}" | ||
|  |     local file="${path##*/}" | ||
|  |     [[ $dir = "$file" ]] && dir='' | ||
|  | 
 | ||
|  |     # | ||
|  |     # If we have a directory in the path check it exists in the 'to' directory | ||
|  |     # and create it if not | ||
|  |     # | ||
|  |     if [[ -n $dir ]]; then | ||
|  |         if [[ ! -d $dir ]]; then | ||
|  |             mkdir -p "$todir/$dir" | ||
|  |         fi | ||
|  |     fi | ||
|  | 
 | ||
|  |     # | ||
|  |     # Does the file exist already? | ||
|  |     # TODO: Compare the two files? | ||
|  |     # | ||
|  |     if [[ -e $todir/$path ]]; then | ||
|  |         echo "File already exists: $todir/$path" | ||
|  |         return 1 | ||
|  |     else | ||
|  |         mv "$fromdir/$path" "$todir/$path" | ||
|  |         echo "Moved $fromdir/$path" | ||
|  |         return 0 | ||
|  |     fi | ||
|  | 
 | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: is_empty | ||
|  | #  DESCRIPTION: Check whether a directory is empty (of files) | ||
|  | #   PARAMETERS: $1      Directory to test | ||
|  | #      RETURNS: True if empty (of files), otherwise false | ||
|  | #=============================================================================== | ||
|  | is_empty() { | ||
|  |     test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)" | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: _DEBUG | ||
|  | #  DESCRIPTION: Writes a message if in DEBUG mode | ||
|  | #   PARAMETERS: List of messages | ||
|  | #      RETURNS: Nothing | ||
|  | #=============================================================================== | ||
|  | _DEBUG () { | ||
|  |     [ "$DEBUG" == 0 ] && return | ||
|  |     for msg in "$@"; do | ||
|  |         printf 'D> %s\n' "$msg" | ||
|  |     done | ||
|  | } | ||
|  | 
 | ||
|  | #===  FUNCTION  ================================================================ | ||
|  | #         NAME: _usage | ||
|  | #  DESCRIPTION: Report usage | ||
|  | #   PARAMETERS: 1       [optional] exit value | ||
|  | #      RETURNS: Nothing | ||
|  | #=============================================================================== | ||
|  | _usage () { | ||
|  |     local -i res="${1:-0}" | ||
|  | 
 | ||
|  |     cat >$STDOUT <<-endusage | ||
|  | ${SCRIPT} - version: ${VERSION} | ||
|  | 
 | ||
|  | Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D] | ||
|  | 
 | ||
|  | Moves HPR audio and other show-related files on 'borg' after their shows | ||
|  | have been uploaded to the Internet Archive. Files to be uploaded are in the | ||
|  | directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}. | ||
|  | 
 | ||
|  | Options: | ||
|  |   -h                    Print this help | ||
|  |   -v                    Run in verbose mode where more information is reported | ||
|  |   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run | ||
|  |                         mode where nothing is moved but the actions that | ||
|  |                         will be taken are reported; -d 0 turns off dry-run | ||
|  |                         mode and the actions will be carried out. | ||
|  |   -c COUNT              Count of shows to process. If omitted or zero then all | ||
|  |                         shows will be processed, otherwise this is the number | ||
|  |                         to stop at. | ||
|  |   -D                    Run in debug mode where a lot more information is | ||
|  |                         reported | ||
|  | 
 | ||
|  | Examples | ||
|  |     ./tidy_uploaded             # Run in (default) dry-run mode | ||
|  |     ./tidy_uploaded -v          # Dry-run mode with verbose messages | ||
|  |     ./tidy_uploaded -d0         # Live mode (without verbose messages) | ||
|  |     ./tidy_uploaded -c1         # Process 1 show in dry-run mode | ||
|  |     ./tidy_uploaded -D          # Run with debugging enabled | ||
|  | 
 | ||
|  | endusage | ||
|  |     exit "$res" | ||
|  | } | ||
|  | 
 | ||
|  | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|  | 
 | ||
|  | # | ||
|  | # Directories and files | ||
|  | # | ||
|  | LOGS="$BASEDIR/logs" | ||
|  | LOGFILE="$LOGS/$SCRIPT.log" | ||
|  | 
 | ||
|  | # | ||
|  | # Process options | ||
|  | # | ||
|  | while getopts :c:d:Dhv opt | ||
|  | do | ||
|  |     case "${opt}" in | ||
|  |         c) COUNT=$OPTARG;; | ||
|  |         D) DEBUG=1;; | ||
|  |         d) DRYRUN=$OPTARG;; | ||
|  |         h) _usage 0;; | ||
|  |         v) VERBOSE=1;; | ||
|  |         *) echo "** Unknown option" | ||
|  |            _usage 1;; | ||
|  |     esac | ||
|  | done | ||
|  | shift $((OPTIND - 1)) | ||
|  | 
 | ||
|  | COUNT=${COUNT:-0} | ||
|  | if [[ ! $COUNT =~ ^[0-9]+$ ]]; then | ||
|  |     echo "** Use a numeric argument with -c" | ||
|  |     _usage 1 | ||
|  | fi | ||
|  | 
 | ||
|  | DRYRUN=${DRYRUN:-1} | ||
|  | if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then | ||
|  |     echo "** Use '-d 0' or '-d 1'" | ||
|  |     _usage 1 | ||
|  | fi | ||
|  | [[ $DRYRUN -eq 1 ]] && echo "Dry run mode" | ||
|  | 
 | ||
|  | VERBOSE=${VERBOSE:-0} | ||
|  | 
 | ||
|  | DEBUG=${DEBUG:-0} | ||
|  | [[ $DEBUG -eq 1 ]] && echo "Debug mode" | ||
|  | 
 | ||
|  | # | ||
|  | # Should have no arguments | ||
|  | # | ||
|  | if [[ $# != 0 ]]; then | ||
|  |     echo "** ${SCRIPT} takes no arguments" | ||
|  |     _usage 1 | ||
|  | fi | ||
|  | 
 | ||
|  | # | ||
|  | # Declarations | ||
|  | # | ||
|  | declare -A seen | ||
|  | declare -a dirs | ||
|  | # lastitem= | ||
|  | ind=0 | ||
|  | 
 | ||
|  | # | ||
|  | # Scan the directory 'UPLOADS' where files for upload to the IA are stored. | ||
|  | # | ||
|  | # See the `find' pipeline at the end of the loop which outputs the last change | ||
|  | # time and the full file path, sorts on the time, then removes it. This | ||
|  | # ensures we process the files in time order rather than alphabetic order of | ||
|  | # their names. | ||
|  | # | ||
|  | while read -r path; do | ||
|  |     # | ||
|  |     # Extract the path relative to $UPLOADS and the IA item name from the | ||
|  |     # returned path. Here $relpath will be the filename or a sub-directory and | ||
|  |     # filename, and $item will be the IA identifier like 'hpr1192'. | ||
|  |     # | ||
|  |     relpath="${path#"$UPLOADS"/}" | ||
|  |     item="${relpath:0:7}" | ||
|  | 
 | ||
|  |     [[ $VERBOSE -eq 1 ]] && echo "Found $path" | ||
|  | 
 | ||
|  |     _DEBUG "Path:          $path" | ||
|  |     _DEBUG "Relative path: $relpath" | ||
|  |     _DEBUG "IA item:       $item" | ||
|  | 
 | ||
|  |     # | ||
|  |     # Detect that the item prefix has changed. If it has we're processing | ||
|  |     # a new IA identifier, so work on this one | ||
|  |     # | ||
|  |     # If we have seen this item before we don't need to process it, so just | ||
|  |     # skip this loop iteration | ||
|  |     # | ||
|  | 
 | ||
|  |     # | ||
|  |     # Never seen before, so process it | ||
|  |     # | ||
|  |     if ! exists_in seen "$item"; then | ||
|  |         # shellcheck disable=SC2034 | ||
|  |         seen[$item]=1 | ||
|  | 
 | ||
|  |         # | ||
|  |         # Count this item and stop the loop if we've reached the requested | ||
|  |         # count. We want the value of $ind to be the number of shows | ||
|  |         # processed, so adjust it if we stopped after incrementing it. | ||
|  |         # | ||
|  |         ((ind++)) | ||
|  |         if [[ $COUNT -gt 0 ]]; then | ||
|  |             if [[ $ind -gt $COUNT ]]; then | ||
|  |                 ((ind--)) | ||
|  |                 break | ||
|  |             fi | ||
|  |             echo "[ Show #$ind ]" | ||
|  |         fi | ||
|  | 
 | ||
|  |         # | ||
|  |         # Look to see if there are any tasks queued for this show. If there | ||
|  |         # are we'll skip it just now. | ||
|  |         # | ||
|  |         tasks=$(queued_tasks "$item") | ||
|  |         if [[ $tasks -gt 0 ]]; then | ||
|  |             echo "** Item $item still has $tasks unfinished " \ | ||
|  |                 "$(ngettext task tasks "$tasks")" | ||
|  |             echo "** Skipping to the next item" | ||
|  |             continue | ||
|  |         fi | ||
|  | 
 | ||
|  |         [[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item" | ||
|  | 
 | ||
|  |         # | ||
|  |         # Interrogate the IA for the item we're working on. If it returns True | ||
|  |         # we can proceed with tidying. The file 'TMP1' contains just a simple | ||
|  |         # list of the files on the IIA relating to this item. | ||
|  |         # | ||
|  |         if ia list "$item" > "$TMP1"; then | ||
|  |             # | ||
|  |             # Save any directory associated with this item. This means that | ||
|  |             # directories with names that don't conform to the "^hpr[0-9]{4}" | ||
|  |             # pattern will be ignored, but this it *not* expected to happen. | ||
|  |             # Note that directories without corresponding audio will not be | ||
|  |             # cleaned up by this method, but again this is not expected to | ||
|  |             # happen. | ||
|  |             # TODO: be alert to such issues! | ||
|  |             # | ||
|  |             dirpath="$UPLOADS/$item" | ||
|  |             if [[ -d "$dirpath" ]]; then | ||
|  |                 echo "Storing directory: $item" | ||
|  |                 dirs+=("$item") | ||
|  |             fi | ||
|  | 
 | ||
|  |             moves=0 | ||
|  | 
 | ||
|  |             # | ||
|  |             # Scan the returned list to see if any files we have are online. | ||
|  |             # Move to the ARCHIVE directory when there's a match. | ||
|  |             # | ||
|  |             while read -r file; do | ||
|  |                 frompath="$UPLOADS/$file" | ||
|  |                 topath="$ARCHIVE/$file" | ||
|  | 
 | ||
|  |                 if [[ -e "$frompath" ]]; then | ||
|  |                     # | ||
|  |                     # A file on the IA exists in the upload area. Move the | ||
|  |                     # local one if we're not in dry-run mode, otherwise just | ||
|  |                     # report the move we would do. | ||
|  |                     # | ||
|  |                     if [[ $DRYRUN -eq 0 ]]; then | ||
|  |                         movefile "$UPLOADS" "$ARCHIVE" "$file" && ((moves++)) | ||
|  |                     else | ||
|  |                         printf 'Would move %s\n\tto %s\n' "$frompath" "$topath" | ||
|  |                     fi | ||
|  |                 fi | ||
|  |             done < "$TMP1" | ||
|  | 
 | ||
|  |             # | ||
|  |             # Log this item | ||
|  |             # | ||
|  |             [[ $DRYRUN -eq 0 ]] && \ | ||
|  |                 printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \ | ||
|  |                     "$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE" | ||
|  | 
 | ||
|  |         else | ||
|  |             printf 'Skipping %s; not in the IA\n' "$item" | ||
|  |         fi | ||
|  |     else | ||
|  |         # | ||
|  |         # Ignore all but the first file belonging to an IA identifier | ||
|  |         # | ||
|  |         _DEBUG "Skipped $path - repeated show number" | ||
|  |         continue | ||
|  |     fi | ||
|  | 
 | ||
|  | done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort  | cut -f2 -d' ') | ||
|  | 
 | ||
|  | # Old 'find' used: | ||
|  | # done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort) | ||
|  | 
 | ||
|  | # | ||
|  | # No shows processed? There was nothing to do | ||
|  | # | ||
|  | if [[ $ind -eq 0 ]]; then | ||
|  |     [[ $DRYRUN -eq 0 ]] && echo "Nothing to do" | ||
|  |     exit | ||
|  | fi | ||
|  | 
 | ||
|  | _DEBUG "Number of shows scanned: $ind" | ||
|  | # _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")" | ||
|  | 
 | ||
|  | # | ||
|  | # If there are no directories just exit. | ||
|  | # | ||
|  | [[ -v dirs ]] || exit | ||
|  | 
 | ||
|  | # | ||
|  | # By an (as yet) unknown process we might get duplicates, so remove them here. | ||
|  | # | ||
|  | # mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq) | ||
|  | declare -A unique | ||
|  | for e in "${dirs[@]}"; do unique[$e]=1; done | ||
|  | dirs=( "${!unique[@]}" ) | ||
|  | # mapfile -t dirs < <(printf '%s\n' "${!unique[@]}") | ||
|  | 
 | ||
|  | _DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")" | ||
|  | 
 | ||
|  | # | ||
|  | # Clean up any empty directories. These may exist because we moved their | ||
|  | # contents one file at a time. We only deal with the directories we've visited | ||
|  | # though. | ||
|  | # | ||
|  | for dir in "${dirs[@]}"; do | ||
|  |     path="$UPLOADS/$dir" | ||
|  | 
 | ||
|  |     if [[ $DRYRUN -eq 0 ]]; then | ||
|  |         if is_empty "$path"; then | ||
|  |             rm -rf "$path" | ||
|  |             RES=$? | ||
|  |             if [[ $RES -eq 0 ]]; then | ||
|  |                 echo "Deleted $path" | ||
|  |                 echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE" | ||
|  |             else | ||
|  |                 echo "Failed to delete: $path" | ||
|  |             fi | ||
|  |         else | ||
|  |             echo "Directory is not empty: $path" | ||
|  |             echo "Not deleted!" | ||
|  |         fi | ||
|  |     else | ||
|  |         echo "Would delete directory $path" | ||
|  |     fi | ||
|  | 
 | ||
|  | done | ||
|  | 
 | ||
|  | exit | ||
|  | 
 | ||
|  | # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker |