forked from HPR/hpr-tools
		
	Moved project directories and files to an empty local repo
This commit is contained in:
		
							
								
								
									
										454
									
								
								InternetArchive/tidy_uploaded
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										454
									
								
								InternetArchive/tidy_uploaded
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,454 @@ | ||||
| #!/bin/bash - | ||||
| #=============================================================================== | ||||
| # | ||||
| #         FILE: tidy_uploaded | ||||
| # | ||||
| #        USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT] | ||||
| # | ||||
| #  DESCRIPTION: Relocates HPR audio and other show-related files on 'borg' | ||||
| #               after their shows have been uploaded to the Internet Archive | ||||
| # | ||||
| #      OPTIONS: --- | ||||
| # REQUIREMENTS: --- | ||||
| #         BUGS: --- | ||||
| #        NOTES: --- | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.0.10 | ||||
| #      CREATED: 2022-03-30 17:38:01 | ||||
| #     REVISION: 2022-07-30 14:30:43 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| set -o nounset                              # Treat unset variables as an error | ||||
|  | ||||
| VERSION="0.0.10" | ||||
|  | ||||
| SCRIPT=${0##*/} | ||||
| # DIR=${0%/*} | ||||
|  | ||||
| STDOUT="/dev/fd/2" | ||||
|  | ||||
| # | ||||
| # Load library functions | ||||
| # | ||||
| LIB="$HOME/bin/function_lib.sh" | ||||
| [ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; } | ||||
| # shellcheck disable=SC1090 | ||||
| source "$LIB" | ||||
|  | ||||
| # | ||||
| # Make temporary files and set traps to delete them | ||||
| # | ||||
| TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } | ||||
| trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT | ||||
|  | ||||
| # | ||||
| # Configure depending whether local or on the VPS | ||||
| # | ||||
| case $HOSTNAME in | ||||
|     borg)       BASEDIR="$HOME/InternetArchive" | ||||
|                 UPLOADS="/data/IA/uploads" | ||||
|                 ARCHIVE="/data/IA/done" ;; | ||||
|     i7-desktop) BASEDIR="$HOME/HPR/InternetArchive" | ||||
|                 UPLOADS="$HOME/HPR/IA/uploads" | ||||
|                 ARCHIVE="$HOME/HPR/IA/done";; | ||||
|     *)          echo "Wrong host!"; exit 1 ;; | ||||
| esac | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: exists_in | ||||
| #  DESCRIPTION: Checks the existence of a key in an associative array | ||||
| #   PARAMETERS: $1      array name | ||||
| #               $2      key value | ||||
| #      RETURNS: True if the key exists, False otherwise | ||||
| # | ||||
| # Modified from | ||||
| # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array | ||||
| #=============================================================================== | ||||
| exists_in () { | ||||
|     # shellcheck disable=SC2086 | ||||
|     eval '[ ${'$1'[$2]+muahaha} ]' | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: queued_tasks | ||||
| #  DESCRIPTION: Queries the IA for any queued or running tasks for an item. | ||||
| #               Writes the number to STDOUT so it can be captured. | ||||
| #   PARAMETERS: $1      IA item (like hpr1192) | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| queued_tasks () { | ||||
|     local item="${1:?Usage: queued_tasks item}" | ||||
|     local -i count=0 | ||||
|  | ||||
|     count="$(ia tasks "$item" |\ | ||||
|         jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')" | ||||
|  | ||||
|     echo "$count" | ||||
|  | ||||
|     return | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: movefile | ||||
| #  DESCRIPTION: Moves a file to a new place, catering for any directories in | ||||
| #               the path | ||||
| #   PARAMETERS: $1      directory to move form | ||||
| #               $2      directory to move to | ||||
| #               $3      file (or sub-path to move) | ||||
| #      RETURNS: True if a move was done, otherwise False | ||||
| #=============================================================================== | ||||
| movefile () { | ||||
|     local fromdir="${1:?Usage: movefile fromdir todir path}" | ||||
|     local todir="${2:?Usage: movefile fromdir todir path}" | ||||
|     local path="${3:?Usage: movefile fromdir todir path}" | ||||
|  | ||||
|     # | ||||
|     # Chop up the path. If it's just a file name then $dir and $file are the | ||||
|     # same, in which case we make $dir empty. | ||||
|     # | ||||
|     local dir="${path%/*}" | ||||
|     local file="${path##*/}" | ||||
|     [[ $dir = "$file" ]] && dir='' | ||||
|  | ||||
|     # | ||||
|     # If we have a directory in the path check it exists in the 'to' directory | ||||
|     # and create it if not | ||||
|     # | ||||
|     if [[ -n $dir ]]; then | ||||
|         if [[ ! -d $dir ]]; then | ||||
|             mkdir -p "$todir/$dir" | ||||
|         fi | ||||
|     fi | ||||
|  | ||||
|     # | ||||
|     # Does the file exist already? | ||||
|     # TODO: Compare the two files? | ||||
|     # | ||||
|     if [[ -e $todir/$path ]]; then | ||||
|         echo "File already exists: $todir/$path" | ||||
|         return 1 | ||||
|     else | ||||
|         mv "$fromdir/$path" "$todir/$path" | ||||
|         echo "Moved $fromdir/$path" | ||||
|         return 0 | ||||
|     fi | ||||
|  | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: is_empty | ||||
| #  DESCRIPTION: Check whether a directory is empty (of files) | ||||
| #   PARAMETERS: $1      Directory to test | ||||
| #      RETURNS: True if empty (of files), otherwise false | ||||
| #=============================================================================== | ||||
| is_empty() { | ||||
|     test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)" | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _DEBUG | ||||
| #  DESCRIPTION: Writes a message if in DEBUG mode | ||||
| #   PARAMETERS: List of messages | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _DEBUG () { | ||||
|     [ "$DEBUG" == 0 ] && return | ||||
|     for msg in "$@"; do | ||||
|         printf 'D> %s\n' "$msg" | ||||
|     done | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _usage | ||||
| #  DESCRIPTION: Report usage | ||||
| #   PARAMETERS: 1       [optional] exit value | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _usage () { | ||||
|     local -i res="${1:-0}" | ||||
|  | ||||
|     cat >$STDOUT <<-endusage | ||||
| ${SCRIPT} - version: ${VERSION} | ||||
|  | ||||
| Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D] | ||||
|  | ||||
| Moves HPR audio and other show-related files on 'borg' after their shows | ||||
| have been uploaded to the Internet Archive. Files to be uploaded are in the | ||||
| directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}. | ||||
|  | ||||
| Options: | ||||
|   -h                    Print this help | ||||
|   -v                    Run in verbose mode where more information is reported | ||||
|   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run | ||||
|                         mode where nothing is moved but the actions that | ||||
|                         will be taken are reported; -d 0 turns off dry-run | ||||
|                         mode and the actions will be carried out. | ||||
|   -c COUNT              Count of shows to process. If omitted or zero then all | ||||
|                         shows will be processed, otherwise this is the number | ||||
|                         to stop at. | ||||
|   -D                    Run in debug mode where a lot more information is | ||||
|                         reported | ||||
|  | ||||
| Examples | ||||
|     ./tidy_uploaded             # Run in (default) dry-run mode | ||||
|     ./tidy_uploaded -v          # Dry-run mode with verbose messages | ||||
|     ./tidy_uploaded -d0         # Live mode (without verbose messages) | ||||
|     ./tidy_uploaded -c1         # Process 1 show in dry-run mode | ||||
|     ./tidy_uploaded -D          # Run with debugging enabled | ||||
|  | ||||
| endusage | ||||
|     exit "$res" | ||||
| } | ||||
|  | ||||
| #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| # | ||||
| # Directories and files | ||||
| # | ||||
| LOGS="$BASEDIR/logs" | ||||
| LOGFILE="$LOGS/$SCRIPT.log" | ||||
|  | ||||
| # | ||||
| # Process options | ||||
| # | ||||
| while getopts :c:d:Dhv opt | ||||
| do | ||||
|     case "${opt}" in | ||||
|         c) COUNT=$OPTARG;; | ||||
|         D) DEBUG=1;; | ||||
|         d) DRYRUN=$OPTARG;; | ||||
|         h) _usage 0;; | ||||
|         v) VERBOSE=1;; | ||||
|         *) echo "** Unknown option" | ||||
|            _usage 1;; | ||||
|     esac | ||||
| done | ||||
| shift $((OPTIND - 1)) | ||||
|  | ||||
| COUNT=${COUNT:-0} | ||||
| if [[ ! $COUNT =~ ^[0-9]+$ ]]; then | ||||
|     echo "** Use a numeric argument with -c" | ||||
|     _usage 1 | ||||
| fi | ||||
|  | ||||
| DRYRUN=${DRYRUN:-1} | ||||
| if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then | ||||
|     echo "** Use '-d 0' or '-d 1'" | ||||
|     _usage 1 | ||||
| fi | ||||
| [[ $DRYRUN -eq 1 ]] && echo "Dry run mode" | ||||
|  | ||||
| VERBOSE=${VERBOSE:-0} | ||||
|  | ||||
| DEBUG=${DEBUG:-0} | ||||
| [[ $DEBUG -eq 1 ]] && echo "Debug mode" | ||||
|  | ||||
| # | ||||
| # Should have no arguments | ||||
| # | ||||
| if [[ $# != 0 ]]; then | ||||
|     echo "** ${SCRIPT} takes no arguments" | ||||
|     _usage 1 | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Declarations | ||||
| # | ||||
| declare -A seen | ||||
| declare -a dirs | ||||
| # lastitem= | ||||
| ind=0 | ||||
|  | ||||
| # | ||||
| # Scan the directory 'UPLOADS' where files for upload to the IA are stored. | ||||
| # | ||||
| # See the `find' pipeline at the end of the loop which outputs the last change | ||||
| # time and the full file path, sorts on the time, then removes it. This | ||||
| # ensures we process the files in time order rather than alphabetic order of | ||||
| # their names. | ||||
| # | ||||
| while read -r path; do | ||||
|     # | ||||
|     # Extract the path relative to $UPLOADS and the IA item name from the | ||||
|     # returned path. Here $relpath will be the filename or a sub-directory and | ||||
|     # filename, and $item will be the IA identifier like 'hpr1192'. | ||||
|     # | ||||
|     relpath="${path#"$UPLOADS"/}" | ||||
|     item="${relpath:0:7}" | ||||
|  | ||||
|     [[ $VERBOSE -eq 1 ]] && echo "Found $path" | ||||
|  | ||||
|     _DEBUG "Path:          $path" | ||||
|     _DEBUG "Relative path: $relpath" | ||||
|     _DEBUG "IA item:       $item" | ||||
|  | ||||
|     # | ||||
|     # Detect that the item prefix has changed. If it has we're processing | ||||
|     # a new IA identifier, so work on this one | ||||
|     # | ||||
|     # If we have seen this item before we don't need to process it, so just | ||||
|     # skip this loop iteration | ||||
|     # | ||||
|  | ||||
|     # | ||||
|     # Never seen before, so process it | ||||
|     # | ||||
|     if ! exists_in seen "$item"; then | ||||
|         # shellcheck disable=SC2034 | ||||
|         seen[$item]=1 | ||||
|  | ||||
|         # | ||||
|         # Count this item and stop the loop if we've reached the requested | ||||
|         # count. We want the value of $ind to be the number of shows | ||||
|         # processed, so adjust it if we stopped after incrementing it. | ||||
|         # | ||||
|         ((ind++)) | ||||
|         if [[ $COUNT -gt 0 ]]; then | ||||
|             if [[ $ind -gt $COUNT ]]; then | ||||
|                 ((ind--)) | ||||
|                 break | ||||
|             fi | ||||
|             echo "[ Show #$ind ]" | ||||
|         fi | ||||
|  | ||||
|         # | ||||
|         # Look to see if there are any tasks queued for this show. If there | ||||
|         # are we'll skip it just now. | ||||
|         # | ||||
|         tasks=$(queued_tasks "$item") | ||||
|         if [[ $tasks -gt 0 ]]; then | ||||
|             echo "** Item $item still has $tasks unfinished " \ | ||||
|                 "$(ngettext task tasks "$tasks")" | ||||
|             echo "** Skipping to the next item" | ||||
|             continue | ||||
|         fi | ||||
|  | ||||
|         [[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item" | ||||
|  | ||||
|         # | ||||
|         # Interrogate the IA for the item we're working on. If it returns True | ||||
|         # we can proceed with tidying. The file 'TMP1' contains just a simple | ||||
|         # list of the files on the IIA relating to this item. | ||||
|         # | ||||
|         if ia list "$item" > "$TMP1"; then | ||||
|             # | ||||
|             # Save any directory associated with this item. This means that | ||||
|             # directories with names that don't conform to the "^hpr[0-9]{4}" | ||||
|             # pattern will be ignored, but this it *not* expected to happen. | ||||
|             # Note that directories without corresponding audio will not be | ||||
|             # cleaned up by this method, but again this is not expected to | ||||
|             # happen. | ||||
|             # TODO: be alert to such issues! | ||||
|             # | ||||
|             dirpath="$UPLOADS/$item" | ||||
|             if [[ -d "$dirpath" ]]; then | ||||
|                 echo "Storing directory: $item" | ||||
|                 dirs+=("$item") | ||||
|             fi | ||||
|  | ||||
|             moves=0 | ||||
|  | ||||
|             # | ||||
|             # Scan the returned list to see if any files we have are online. | ||||
|             # Move to the ARCHIVE directory when there's a match. | ||||
|             # | ||||
|             while read -r file; do | ||||
|                 frompath="$UPLOADS/$file" | ||||
|                 topath="$ARCHIVE/$file" | ||||
|  | ||||
|                 if [[ -e "$frompath" ]]; then | ||||
|                     # | ||||
|                     # A file on the IA exists in the upload area. Move the | ||||
|                     # local one if we're not in dry-run mode, otherwise just | ||||
|                     # report the move we would do. | ||||
|                     # | ||||
|                     if [[ $DRYRUN -eq 0 ]]; then | ||||
|                         movefile "$UPLOADS" "$ARCHIVE" "$file" && ((moves++)) | ||||
|                     else | ||||
|                         printf 'Would move %s\n\tto %s\n' "$frompath" "$topath" | ||||
|                     fi | ||||
|                 fi | ||||
|             done < "$TMP1" | ||||
|  | ||||
|             # | ||||
|             # Log this item | ||||
|             # | ||||
|             [[ $DRYRUN -eq 0 ]] && \ | ||||
|                 printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \ | ||||
|                     "$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE" | ||||
|  | ||||
|         else | ||||
|             printf 'Skipping %s; not in the IA\n' "$item" | ||||
|         fi | ||||
|     else | ||||
|         # | ||||
|         # Ignore all but the first file belonging to an IA identifier | ||||
|         # | ||||
|         _DEBUG "Skipped $path - repeated show number" | ||||
|         continue | ||||
|     fi | ||||
|  | ||||
| done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort  | cut -f2 -d' ') | ||||
|  | ||||
| # Old 'find' used: | ||||
| # done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort) | ||||
|  | ||||
| # | ||||
| # No shows processed? There was nothing to do | ||||
| # | ||||
| if [[ $ind -eq 0 ]]; then | ||||
|     [[ $DRYRUN -eq 0 ]] && echo "Nothing to do" | ||||
|     exit | ||||
| fi | ||||
|  | ||||
| _DEBUG "Number of shows scanned: $ind" | ||||
| # _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")" | ||||
|  | ||||
| # | ||||
| # If there are no directories just exit. | ||||
| # | ||||
| [[ -v dirs ]] || exit | ||||
|  | ||||
| # | ||||
| # By an (as yet) unknown process we might get duplicates, so remove them here. | ||||
| # | ||||
| # mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq) | ||||
| declare -A unique | ||||
| for e in "${dirs[@]}"; do unique[$e]=1; done | ||||
| dirs=( "${!unique[@]}" ) | ||||
| # mapfile -t dirs < <(printf '%s\n' "${!unique[@]}") | ||||
|  | ||||
| _DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")" | ||||
|  | ||||
| # | ||||
| # Clean up any empty directories. These may exist because we moved their | ||||
| # contents one file at a time. We only deal with the directories we've visited | ||||
| # though. | ||||
| # | ||||
| for dir in "${dirs[@]}"; do | ||||
|     path="$UPLOADS/$dir" | ||||
|  | ||||
|     if [[ $DRYRUN -eq 0 ]]; then | ||||
|         if is_empty "$path"; then | ||||
|             rm -rf "$path" | ||||
|             RES=$? | ||||
|             if [[ $RES -eq 0 ]]; then | ||||
|                 echo "Deleted $path" | ||||
|                 echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE" | ||||
|             else | ||||
|                 echo "Failed to delete: $path" | ||||
|             fi | ||||
|         else | ||||
|             echo "Directory is not empty: $path" | ||||
|             echo "Not deleted!" | ||||
|         fi | ||||
|     else | ||||
|         echo "Would delete directory $path" | ||||
|     fi | ||||
|  | ||||
| done | ||||
|  | ||||
| exit | ||||
|  | ||||
| # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker | ||||
		Reference in New Issue
	
	Block a user