forked from HPR/hpr-tools
		
	
		
			
				
	
	
		
			455 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			455 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/bash -
 | |
| #===============================================================================
 | |
| #
 | |
| #         FILE: tidy_uploaded
 | |
| #
 | |
| #        USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT]
 | |
| #
 | |
| #  DESCRIPTION: Relocates HPR audio and other show-related files on 'borg'
 | |
| #               after their shows have been uploaded to the Internet Archive
 | |
| #
 | |
| #      OPTIONS: ---
 | |
| # REQUIREMENTS: ---
 | |
| #         BUGS: ---
 | |
| #        NOTES: ---
 | |
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | |
| #      VERSION: 0.0.10
 | |
| #      CREATED: 2022-03-30 17:38:01
 | |
| #     REVISION: 2022-07-30 14:30:43
 | |
| #
 | |
| #===============================================================================
 | |
| 
 | |
| set -o nounset                              # Treat unset variables as an error
 | |
| 
 | |
| VERSION="0.0.10"
 | |
| 
 | |
| SCRIPT=${0##*/}
 | |
| # DIR=${0%/*}
 | |
| 
 | |
| STDOUT="/dev/fd/2"
 | |
| 
 | |
| #
 | |
| # Load library functions
 | |
| #
 | |
| LIB="$HOME/bin/function_lib.sh"
 | |
| [ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
 | |
| # shellcheck disable=SC1090
 | |
| source "$LIB"
 | |
| 
 | |
| #
 | |
| # Make temporary files and set traps to delete them
 | |
| #
 | |
| TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
 | |
| trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
 | |
| 
 | |
| #
 | |
| # Configure depending whether local or on the VPS
 | |
| #
 | |
| case $HOSTNAME in
 | |
|     borg)       BASEDIR="$HOME/InternetArchive"
 | |
|                 UPLOADS="/data/IA/uploads"
 | |
|                 ARCHIVE="/data/IA/done" ;;
 | |
|     i7-desktop) BASEDIR="$HOME/HPR/InternetArchive"
 | |
|                 UPLOADS="$HOME/HPR/IA/uploads"
 | |
|                 ARCHIVE="$HOME/HPR/IA/done";;
 | |
|     *)          echo "Wrong host!"; exit 1 ;;
 | |
| esac
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: exists_in
 | |
| #  DESCRIPTION: Checks the existence of a key in an associative array
 | |
| #   PARAMETERS: $1      array name
 | |
| #               $2      key value
 | |
| #      RETURNS: True if the key exists, False otherwise
 | |
| #
 | |
| # Modified from
 | |
| # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array
 | |
| #===============================================================================
 | |
| exists_in () {
 | |
|     # shellcheck disable=SC2086
 | |
|     eval '[ ${'$1'[$2]+muahaha} ]'
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: queued_tasks
 | |
| #  DESCRIPTION: Queries the IA for any queued or running tasks for an item.
 | |
| #               Writes the number to STDOUT so it can be captured.
 | |
| #   PARAMETERS: $1      IA item (like hpr1192)
 | |
| #      RETURNS: Nothing
 | |
| #===============================================================================
 | |
| queued_tasks () {
 | |
|     local item="${1:?Usage: queued_tasks item}"
 | |
|     local -i count=0
 | |
| 
 | |
|     count="$(ia tasks "$item" |\
 | |
|         jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
 | |
| 
 | |
|     echo "$count"
 | |
| 
 | |
|     return
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: movefile
 | |
| #  DESCRIPTION: Moves a file to a new place, catering for any directories in
 | |
| #               the path
 | |
| #   PARAMETERS: $1      directory to move form
 | |
| #               $2      directory to move to
 | |
| #               $3      file (or sub-path to move)
 | |
| #      RETURNS: True if a move was done, otherwise False
 | |
| #===============================================================================
 | |
| movefile () {
 | |
|     local fromdir="${1:?Usage: movefile fromdir todir path}"
 | |
|     local todir="${2:?Usage: movefile fromdir todir path}"
 | |
|     local path="${3:?Usage: movefile fromdir todir path}"
 | |
| 
 | |
|     #
 | |
|     # Chop up the path. If it's just a file name then $dir and $file are the
 | |
|     # same, in which case we make $dir empty.
 | |
|     #
 | |
|     local dir="${path%/*}"
 | |
|     local file="${path##*/}"
 | |
|     [[ $dir = "$file" ]] && dir=''
 | |
| 
 | |
|     #
 | |
|     # If we have a directory in the path check it exists in the 'to' directory
 | |
|     # and create it if not
 | |
|     #
 | |
|     if [[ -n $dir ]]; then
 | |
|         if [[ ! -d $dir ]]; then
 | |
|             mkdir -p "$todir/$dir"
 | |
|         fi
 | |
|     fi
 | |
| 
 | |
|     #
 | |
|     # Does the file exist already?
 | |
|     # TODO: Compare the two files?
 | |
|     #
 | |
|     if [[ -e $todir/$path ]]; then
 | |
|         echo "File already exists: $todir/$path"
 | |
|         return 1
 | |
|     else
 | |
|         mv "$fromdir/$path" "$todir/$path"
 | |
|         echo "Moved $fromdir/$path"
 | |
|         return 0
 | |
|     fi
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: is_empty
 | |
| #  DESCRIPTION: Check whether a directory is empty (of files)
 | |
| #   PARAMETERS: $1      Directory to test
 | |
| #      RETURNS: True if empty (of files), otherwise false
 | |
| #===============================================================================
 | |
| is_empty() {
 | |
|     test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)"
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _DEBUG
 | |
| #  DESCRIPTION: Writes a message if in DEBUG mode
 | |
| #   PARAMETERS: List of messages
 | |
| #      RETURNS: Nothing
 | |
| #===============================================================================
 | |
| _DEBUG () {
 | |
|     [ "$DEBUG" == 0 ] && return
 | |
|     for msg in "$@"; do
 | |
|         printf 'D> %s\n' "$msg"
 | |
|     done
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _usage
 | |
| #  DESCRIPTION: Report usage
 | |
| #   PARAMETERS: 1       [optional] exit value
 | |
| #      RETURNS: Nothing
 | |
| #===============================================================================
 | |
| _usage () {
 | |
|     local -i res="${1:-0}"
 | |
| 
 | |
|     cat >$STDOUT <<-endusage
 | |
| ${SCRIPT} - version: ${VERSION}
 | |
| 
 | |
| Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D]
 | |
| 
 | |
| Moves HPR audio and other show-related files on 'borg' after their shows
 | |
| have been uploaded to the Internet Archive. Files to be uploaded are in the
 | |
| directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}.
 | |
| 
 | |
| Options:
 | |
|   -h                    Print this help
 | |
|   -v                    Run in verbose mode where more information is reported
 | |
|   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run
 | |
|                         mode where nothing is moved but the actions that
 | |
|                         will be taken are reported; -d 0 turns off dry-run
 | |
|                         mode and the actions will be carried out.
 | |
|   -c COUNT              Count of shows to process. If omitted or zero then all
 | |
|                         shows will be processed, otherwise this is the number
 | |
|                         to stop at.
 | |
|   -D                    Run in debug mode where a lot more information is
 | |
|                         reported
 | |
| 
 | |
| Examples
 | |
|     ./tidy_uploaded             # Run in (default) dry-run mode
 | |
|     ./tidy_uploaded -v          # Dry-run mode with verbose messages
 | |
|     ./tidy_uploaded -d0         # Live mode (without verbose messages)
 | |
|     ./tidy_uploaded -c1         # Process 1 show in dry-run mode
 | |
|     ./tidy_uploaded -D          # Run with debugging enabled
 | |
| 
 | |
| endusage
 | |
|     exit "$res"
 | |
| }
 | |
| 
 | |
| #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| #
 | |
| # Directories and files
 | |
| #
 | |
| LOGS="$BASEDIR/logs"
 | |
| LOGFILE="$LOGS/$SCRIPT.log"
 | |
| 
 | |
| #
 | |
| # Process options
 | |
| #
 | |
| while getopts :c:d:Dhv opt
 | |
| do
 | |
|     case "${opt}" in
 | |
|         c) COUNT=$OPTARG;;
 | |
|         D) DEBUG=1;;
 | |
|         d) DRYRUN=$OPTARG;;
 | |
|         h) _usage 0;;
 | |
|         v) VERBOSE=1;;
 | |
|         *) echo "** Unknown option"
 | |
|            _usage 1;;
 | |
|     esac
 | |
| done
 | |
| shift $((OPTIND - 1))
 | |
| 
 | |
| COUNT=${COUNT:-0}
 | |
| if [[ ! $COUNT =~ ^[0-9]+$ ]]; then
 | |
|     echo "** Use a numeric argument with -c"
 | |
|     _usage 1
 | |
| fi
 | |
| 
 | |
| DRYRUN=${DRYRUN:-1}
 | |
| if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
 | |
|     echo "** Use '-d 0' or '-d 1'"
 | |
|     _usage 1
 | |
| fi
 | |
| [[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
 | |
| 
 | |
| VERBOSE=${VERBOSE:-0}
 | |
| 
 | |
| DEBUG=${DEBUG:-0}
 | |
| [[ $DEBUG -eq 1 ]] && echo "Debug mode"
 | |
| 
 | |
| #
 | |
| # Should have no arguments
 | |
| #
 | |
| if [[ $# != 0 ]]; then
 | |
|     echo "** ${SCRIPT} takes no arguments"
 | |
|     _usage 1
 | |
| fi
 | |
| 
 | |
| #
 | |
| # Declarations
 | |
| #
 | |
| declare -A seen
 | |
| declare -a dirs
 | |
| # lastitem=
 | |
| ind=0
 | |
| 
 | |
| #
 | |
| # Scan the directory 'UPLOADS' where files for upload to the IA are stored.
 | |
| #
 | |
| # See the `find' pipeline at the end of the loop which outputs the last change
 | |
| # time and the full file path, sorts on the time, then removes it. This
 | |
| # ensures we process the files in time order rather than alphabetic order of
 | |
| # their names.
 | |
| #
 | |
| while read -r path; do
 | |
|     #
 | |
|     # Extract the path relative to $UPLOADS and the IA item name from the
 | |
|     # returned path. Here $relpath will be the filename or a sub-directory and
 | |
|     # filename, and $item will be the IA identifier like 'hpr1192'.
 | |
|     #
 | |
|     relpath="${path#"$UPLOADS"/}"
 | |
|     item="${relpath:0:7}"
 | |
| 
 | |
|     [[ $VERBOSE -eq 1 ]] && echo "Found $path"
 | |
| 
 | |
|     _DEBUG "Path:          $path"
 | |
|     _DEBUG "Relative path: $relpath"
 | |
|     _DEBUG "IA item:       $item"
 | |
| 
 | |
|     #
 | |
|     # Detect that the item prefix has changed. If it has we're processing
 | |
|     # a new IA identifier, so work on this one
 | |
|     #
 | |
|     # If we have seen this item before we don't need to process it, so just
 | |
|     # skip this loop iteration
 | |
|     #
 | |
| 
 | |
|     #
 | |
|     # Never seen before, so process it
 | |
|     #
 | |
|     if ! exists_in seen "$item"; then
 | |
|         # shellcheck disable=SC2034
 | |
|         seen[$item]=1
 | |
| 
 | |
|         #
 | |
|         # Count this item and stop the loop if we've reached the requested
 | |
|         # count. We want the value of $ind to be the number of shows
 | |
|         # processed, so adjust it if we stopped after incrementing it.
 | |
|         #
 | |
|         ((ind++))
 | |
|         if [[ $COUNT -gt 0 ]]; then
 | |
|             if [[ $ind -gt $COUNT ]]; then
 | |
|                 ((ind--))
 | |
|                 break
 | |
|             fi
 | |
|             echo "[ Show #$ind ]"
 | |
|         fi
 | |
| 
 | |
|         #
 | |
|         # Look to see if there are any tasks queued for this show. If there
 | |
|         # are we'll skip it just now.
 | |
|         #
 | |
|         tasks=$(queued_tasks "$item")
 | |
|         if [[ $tasks -gt 0 ]]; then
 | |
|             echo "** Item $item still has $tasks unfinished " \
 | |
|                 "$(ngettext task tasks "$tasks")"
 | |
|             echo "** Skipping to the next item"
 | |
|             continue
 | |
|         fi
 | |
| 
 | |
|         [[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item"
 | |
| 
 | |
|         #
 | |
|         # Interrogate the IA for the item we're working on. If it returns True
 | |
|         # we can proceed with tidying. The file 'TMP1' contains just a simple
 | |
|         # list of the files on the IIA relating to this item.
 | |
|         #
 | |
|         if ia list "$item" > "$TMP1"; then
 | |
|             #
 | |
|             # Save any directory associated with this item. This means that
 | |
|             # directories with names that don't conform to the "^hpr[0-9]{4}"
 | |
|             # pattern will be ignored, but this it *not* expected to happen.
 | |
|             # Note that directories without corresponding audio will not be
 | |
|             # cleaned up by this method, but again this is not expected to
 | |
|             # happen.
 | |
|             # TODO: be alert to such issues!
 | |
|             #
 | |
|             dirpath="$UPLOADS/$item"
 | |
|             if [[ -d "$dirpath" ]]; then
 | |
|                 echo "Storing directory: $item"
 | |
|                 dirs+=("$item")
 | |
|             fi
 | |
| 
 | |
|             moves=0
 | |
| 
 | |
|             #
 | |
|             # Scan the returned list to see if any files we have are online.
 | |
|             # Move to the ARCHIVE directory when there's a match.
 | |
|             #
 | |
|             while read -r file; do
 | |
|                 frompath="$UPLOADS/$file"
 | |
|                 topath="$ARCHIVE/$file"
 | |
| 
 | |
|                 if [[ -e "$frompath" ]]; then
 | |
|                     #
 | |
|                     # A file on the IA exists in the upload area. Move the
 | |
|                     # local one if we're not in dry-run mode, otherwise just
 | |
|                     # report the move we would do.
 | |
|                     #
 | |
|                     if [[ $DRYRUN -eq 0 ]]; then
 | |
|                         movefile "$UPLOADS" "$ARCHIVE" "$file" && ((moves++))
 | |
|                     else
 | |
|                         printf 'Would move %s\n\tto %s\n' "$frompath" "$topath"
 | |
|                     fi
 | |
|                 fi
 | |
|             done < "$TMP1"
 | |
| 
 | |
|             #
 | |
|             # Log this item
 | |
|             #
 | |
|             [[ $DRYRUN -eq 0 ]] && \
 | |
|                 printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \
 | |
|                     "$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE"
 | |
| 
 | |
|         else
 | |
|             printf 'Skipping %s; not in the IA\n' "$item"
 | |
|         fi
 | |
|     else
 | |
|         #
 | |
|         # Ignore all but the first file belonging to an IA identifier
 | |
|         #
 | |
|         _DEBUG "Skipped $path - repeated show number"
 | |
|         continue
 | |
|     fi
 | |
| 
 | |
| done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort  | cut -f2 -d' ')
 | |
| 
 | |
| # Old 'find' used:
 | |
| # done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
 | |
| 
 | |
| #
 | |
| # No shows processed? There was nothing to do
 | |
| #
 | |
| if [[ $ind -eq 0 ]]; then
 | |
|     [[ $DRYRUN -eq 0 ]] && echo "Nothing to do"
 | |
|     exit
 | |
| fi
 | |
| 
 | |
| _DEBUG "Number of shows scanned: $ind"
 | |
| # _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")"
 | |
| 
 | |
| #
 | |
| # If there are no directories just exit.
 | |
| #
 | |
| [[ -v dirs ]] || exit
 | |
| 
 | |
| #
 | |
| # By an (as yet) unknown process we might get duplicates, so remove them here.
 | |
| #
 | |
| # mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq)
 | |
| declare -A unique
 | |
| for e in "${dirs[@]}"; do unique[$e]=1; done
 | |
| dirs=( "${!unique[@]}" )
 | |
| # mapfile -t dirs < <(printf '%s\n' "${!unique[@]}")
 | |
| 
 | |
| _DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")"
 | |
| 
 | |
| #
 | |
| # Clean up any empty directories. These may exist because we moved their
 | |
| # contents one file at a time. We only deal with the directories we've visited
 | |
| # though.
 | |
| #
 | |
| for dir in "${dirs[@]}"; do
 | |
|     path="$UPLOADS/$dir"
 | |
| 
 | |
|     if [[ $DRYRUN -eq 0 ]]; then
 | |
|         if is_empty "$path"; then
 | |
|             rm -rf "$path"
 | |
|             RES=$?
 | |
|             if [[ $RES -eq 0 ]]; then
 | |
|                 echo "Deleted $path"
 | |
|                 echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE"
 | |
|             else
 | |
|                 echo "Failed to delete: $path"
 | |
|             fi
 | |
|         else
 | |
|             echo "Directory is not empty: $path"
 | |
|             echo "Not deleted!"
 | |
|         fi
 | |
|     else
 | |
|         echo "Would delete directory $path"
 | |
|     fi
 | |
| 
 | |
| done
 | |
| 
 | |
| exit
 | |
| 
 | |
| # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker
 |