forked from HPR/hpr-tools
		
	InternetArchive/future_upload: now updates the state of shows
InternetArchive/reformat_html: new Perl script to reformat the HTML
    originally found in the HPR database in the 'notes' field to the format
    required in the 'description' field of an item on the IA. It reads
    from STDIN and writes to STDOUT.
		
	
		
			
				
	
	
		
			495 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			495 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/bash -
 | |
| #===============================================================================
 | |
| #
 | |
| #         FILE: tidy_uploaded
 | |
| #
 | |
| #        USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT]
 | |
| #
 | |
| #  DESCRIPTION: Relocates HPR audio and other show-related files on 'borg'
 | |
| #               after their shows have been uploaded to the Internet Archive.
 | |
| #
 | |
| #      OPTIONS: ---
 | |
| # REQUIREMENTS: ---
 | |
| #         BUGS: ---
 | |
| #        NOTES: ---
 | |
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
 | |
| #      VERSION: 0.0.11
 | |
| #      CREATED: 2022-03-30 17:38:01
 | |
| #     REVISION: 2024-07-29 18:24:26
 | |
| #
 | |
| #===============================================================================
 | |
| 
 | |
| set -o nounset                              # Treat unset variables as an error
 | |
| 
 | |
| VERSION="0.0.11"
 | |
| 
 | |
| SCRIPT=${0##*/}
 | |
| # DIR=${0%/*}
 | |
| 
 | |
| STDOUT="/dev/fd/2"
 | |
| 
 | |
| #
 | |
| # Load library functions
 | |
| #
 | |
| LIB="$HOME/bin/function_lib.sh"
 | |
| [ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
 | |
| # shellcheck disable=SC1090
 | |
| source "$LIB"
 | |
| 
 | |
| #
 | |
| # Make temporary files and set traps to delete them
 | |
| #
 | |
| TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
 | |
| trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
 | |
| 
 | |
| #
 | |
| # Configure depending whether local or on 'borg'
 | |
| #
 | |
| case $HOSTNAME in
 | |
|     borg)       BASEDIR="$HOME/InternetArchive"
 | |
|                 UPLOADS="/data/IA/uploads"
 | |
|                 ARCHIVE="/data/IA/done" ;;
 | |
|     i7-desktop) BASEDIR="$HOME/HPR/InternetArchive"
 | |
|                 UPLOADS="$HOME/HPR/IA/uploads"
 | |
|                 ARCHIVE="$HOME/HPR/IA/done";;
 | |
|     *)          echo "Wrong host!"; exit 1 ;;
 | |
| esac
 | |
| 
 | |
| # {{{ -- Functions -- exists_in, queued_tasks, movefile, is_empty, _log, _usage
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: exists_in
 | |
| #  DESCRIPTION: Checks the existence of a key in an associative array
 | |
| #   PARAMETERS: $1      array name
 | |
| #               $2      key value
 | |
| #      RETURNS: True if the key exists, False otherwise
 | |
| #
 | |
| # Modified from
 | |
| # https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array
 | |
| #===============================================================================
 | |
| exists_in () {
 | |
|     # shellcheck disable=SC2086
 | |
|     eval '[ ${'$1'[$2]+muahaha} ]'
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: queued_tasks
 | |
| #  DESCRIPTION: Queries the IA for any queued or running tasks for an item.
 | |
| #               Writes the number to STDOUT so it can be captured.
 | |
| #   PARAMETERS: $1      IA item (like hpr1192)
 | |
| #      RETURNS: Nothing
 | |
| #===============================================================================
 | |
| queued_tasks () {
 | |
|     local item="${1:?Usage: queued_tasks item}"
 | |
|     local -i count=0
 | |
| 
 | |
|     count="$(ia tasks "$item" |\
 | |
|         jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
 | |
| 
 | |
|     echo "$count"
 | |
| 
 | |
|     return
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: movefile
 | |
| #  DESCRIPTION: Moves a file to a new place, catering for any directories in
 | |
| #               the path
 | |
| #   PARAMETERS: $1      directory to move from
 | |
| #               $2      directory to move to
 | |
| #               $3      file (or sub-path to move)
 | |
| #      RETURNS: True if a move was done, otherwise False
 | |
| #===============================================================================
 | |
| movefile () {
 | |
|     local fromdir="${1:?Usage: movefile fromdir todir path [FORCE]}"
 | |
|     local todir="${2:?Usage: movefile fromdir todir path [FORCE]}"
 | |
|     local path="${3:?Usage: movefile fromdir todir path [FORCE]}"
 | |
|     local FORCE="${4:-0}"
 | |
| 
 | |
|     [[ ! -v FORCE ]] && FORCE=0
 | |
| 
 | |
|     #
 | |
|     # Chop up the path. If it's just a file name then $dir and $file are the
 | |
|     # same, in which case we make $dir empty.
 | |
|     #
 | |
|     local dir="${path%/*}"
 | |
|     local file="${path##*/}"
 | |
|     [[ $dir = "$file" ]] && dir=''
 | |
| 
 | |
|     #
 | |
|     # If we have a directory in the path check it exists in the 'to' directory
 | |
|     # and create it if not
 | |
|     #
 | |
|     if [[ -n $dir ]]; then
 | |
|         if [[ ! -d $dir ]]; then
 | |
|             mkdir -p "$todir/$dir"
 | |
|         fi
 | |
|     fi
 | |
| 
 | |
|     #
 | |
|     # Does the file exist already?
 | |
|     # TODO: Compare the two files?
 | |
|     #
 | |
|     if [[ -e $todir/$path ]]; then
 | |
|         if [[ $FORCE -eq 1 ]]; then
 | |
|             echo "File exists: $todir/$path"
 | |
|             echo "FORCE mode is ON so overwriting"
 | |
|             mv --force "$fromdir/$path" "$todir/$path"
 | |
|             echo "Moved $fromdir/$path"
 | |
|             return 0
 | |
|         else
 | |
|             echo "File already exists: $todir/$path"
 | |
|             return 1
 | |
|         fi
 | |
|     else
 | |
|         mv "$fromdir/$path" "$todir/$path"
 | |
|         echo "Moved $fromdir/$path"
 | |
|         return 0
 | |
|     fi
 | |
| 
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: is_empty
 | |
| #  DESCRIPTION: Check whether a directory is empty (of files)
 | |
| #   PARAMETERS: $1      Directory to test
 | |
| #      RETURNS: True if empty (of files), otherwise false
 | |
| #===============================================================================
 | |
| is_empty() {
 | |
|     test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)"
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _log
 | |
| #  DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script
 | |
| #               using the predefined $LOGREC, a template for 'printf'. If the
 | |
| #               latter is not defined the function will use a default.
 | |
| #               For some reason 'shellcheck' objects to this function. The
 | |
| #               first argument to 'printf' needs to be -1 to make the
 | |
| #               '%(fmt)T' use today's date and time.
 | |
| #   PARAMETERS: 1 - the message to write
 | |
| #      RETURNS: Nothing
 | |
| #===============================================================================
 | |
| # shellcheck disable=SC2317 disable=SC2059
 | |
| _log () {
 | |
|     local msg="$1"
 | |
| 
 | |
| #     echo "D> $LOGFILE $LOGREC"
 | |
|     [ -v LOGFILE ] || { echo "${FUNCNAME[0]}: \$LOGFILE is not defined"; exit 1; }
 | |
|     [ -v LOGREC ] || { local LOGREC='%(%F %T)T %s\n'; }
 | |
| 
 | |
| #     echo "D> $LOGFILE $LOGREC"
 | |
|     printf "$LOGREC" -1 "$msg" >> "$LOGFILE"
 | |
| 
 | |
|     return
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: _usage
 | |
| #  DESCRIPTION: Report usage
 | |
| #   PARAMETERS: 1       [optional] exit value
 | |
| #      RETURNS: Nothing
 | |
| #===============================================================================
 | |
| _usage () {
 | |
|     local -i res="${1:-0}"
 | |
| 
 | |
|     cat >$STDOUT <<-endusage
 | |
| ${SCRIPT} - version: ${VERSION}
 | |
| 
 | |
| Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D]
 | |
| 
 | |
| Moves HPR audio and other show-related files on 'borg' after their shows
 | |
| have been uploaded to the Internet Archive. Files to be uploaded are in the
 | |
| directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}.
 | |
| 
 | |
| Options:
 | |
|   -h                    Print this help
 | |
|   -v                    Run in verbose mode where more information is reported
 | |
|   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run
 | |
|                         mode where nothing is moved but the actions that
 | |
|                         will be taken are reported; -d 0 turns off dry-run
 | |
|                         mode and the actions will be carried out.
 | |
|   -c COUNT              Count of shows to process. If omitted or zero then all
 | |
|                         shows will be processed, otherwise this is the number
 | |
|                         to stop at.
 | |
|   -D                    Run in debug mode where a lot more information is
 | |
|                         reported
 | |
|   -F                    Turn on FORCE mode (normally off). In this mode when
 | |
|                         the files being tidied (moved) already exist, they are
 | |
|                         overwritten. This is for the very rare case when
 | |
|                         a show's audio has to be re-uploaded because of bad
 | |
|                         audio or the wrong file being sent.
 | |
| 
 | |
| Examples
 | |
|     ./tidy_uploaded             # Run in (default) dry-run mode
 | |
|     ./tidy_uploaded -v          # Dry-run mode with verbose messages
 | |
|     ./tidy_uploaded -d0         # Live mode (without verbose messages)
 | |
|     ./tidy_uploaded -c1         # Process 1 show in dry-run mode
 | |
|     ./tidy_uploaded -D          # Run with debugging enabled
 | |
|     ./tidy_uploaded -F          # Run with FORCE mode on
 | |
| 
 | |
| endusage
 | |
|     exit "$res"
 | |
| }
 | |
| 
 | |
| # }}}
 | |
| 
 | |
| #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| #
 | |
| # Directories and files
 | |
| #
 | |
| LOGS="$BASEDIR/logs"
 | |
| LOGFILE="$LOGS/$SCRIPT.log"
 | |
| LOGREC='%(%F %T)T %s\n'
 | |
| 
 | |
| #
 | |
| # Process options
 | |
| #
 | |
| while getopts :c:d:DFhv opt
 | |
| do
 | |
|     case "${opt}" in
 | |
|         c) COUNT=$OPTARG;;
 | |
|         D) DEBUG=1;;
 | |
|         d) DRYRUN=$OPTARG;;
 | |
|         F) FORCE=1;;
 | |
|         h) _usage 0;;
 | |
|         v) VERBOSE=1;;
 | |
|         *) echo "** Unknown option"
 | |
|            _usage 1;;
 | |
|     esac
 | |
| done
 | |
| shift $((OPTIND - 1))
 | |
| 
 | |
| COUNT=${COUNT:-0}
 | |
| if [[ ! $COUNT =~ ^[0-9]+$ ]]; then
 | |
|     echo "** Use a numeric argument with -c"
 | |
|     _usage 1
 | |
| fi
 | |
| 
 | |
| DRYRUN=${DRYRUN:-1}
 | |
| if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
 | |
|     echo "** Use '-d 0' or '-d 1'"
 | |
|     _usage 1
 | |
| fi
 | |
| [[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
 | |
| 
 | |
| FORCE=${FORCE:-0}
 | |
| [[ $FORCE -eq 1 ]] && echo "Force mode - overwriting existing files"
 | |
| 
 | |
| VERBOSE=${VERBOSE:-0}
 | |
| 
 | |
| DEBUG=${DEBUG:-0}
 | |
| [[ $DEBUG -eq 1 ]] && echo "Debug mode"
 | |
| 
 | |
| #
 | |
| # Should have no arguments
 | |
| #
 | |
| if [[ $# != 0 ]]; then
 | |
|     echo "** ${SCRIPT} takes no arguments"
 | |
|     _usage 1
 | |
| fi
 | |
| 
 | |
| #
 | |
| # Declarations
 | |
| #
 | |
| declare -A seen
 | |
| declare -a dirs
 | |
| # lastitem=
 | |
| ind=0
 | |
| 
 | |
| #
 | |
| # Scan the directory 'UPLOADS' where files for upload to the IA are stored.
 | |
| #
 | |
| # See the `find' pipeline at the end of the loop which outputs the last change
 | |
| # time and the full file path, sorts on the time, then removes it. This
 | |
| # ensures we process the files in time order rather than alphabetic order of
 | |
| # their names.
 | |
| #
 | |
| while read -r path; do
 | |
|     #
 | |
|     # Extract the path relative to $UPLOADS and the IA item name from the
 | |
|     # returned path. Here $relpath will be the filename or a sub-directory and
 | |
|     # filename, and $item will be the IA identifier like 'hpr1192'.
 | |
|     #
 | |
|     relpath="${path#"$UPLOADS"/}"
 | |
|     item="${relpath:0:7}"
 | |
| 
 | |
|     [[ $VERBOSE -eq 1 ]] && echo "Found $path"
 | |
| 
 | |
|     _DEBUG "Path:          $path"
 | |
|     _DEBUG "Relative path: $relpath"
 | |
|     _DEBUG "IA item:       $item"
 | |
| 
 | |
|     #
 | |
|     # Detect that the item prefix has changed. If it has we're processing
 | |
|     # a new IA identifier, so work on this one
 | |
|     #
 | |
|     # If we have seen this item before we don't need to process it, so just
 | |
|     # skip this loop iteration
 | |
|     #
 | |
| 
 | |
|     #
 | |
|     # Never seen before, so process it
 | |
|     #
 | |
|     if ! exists_in seen "$item"; then
 | |
|         # shellcheck disable=SC2034
 | |
|         seen[$item]=1
 | |
| 
 | |
|         #
 | |
|         # Count this item and stop the loop if we've reached the requested
 | |
|         # count. We want the value of $ind to be the number of shows
 | |
|         # processed, so adjust it if we stopped after incrementing it.
 | |
|         #
 | |
|         ((ind++))
 | |
|         if [[ $COUNT -gt 0 ]]; then
 | |
|             if [[ $ind -gt $COUNT ]]; then
 | |
|                 ((ind--))
 | |
|                 break
 | |
|             fi
 | |
|             echo "[ Show #$ind ]"
 | |
|         fi
 | |
| 
 | |
|         #
 | |
|         # Look to see if there are any tasks queued for this show. If there
 | |
|         # are we'll skip it just now.
 | |
|         #
 | |
|         tasks=$(queued_tasks "$item")
 | |
|         if [[ $tasks -gt 0 ]]; then
 | |
|             echo "** Item $item still has $tasks unfinished" \
 | |
|                 "$(ngettext task tasks "$tasks")"
 | |
|             echo "** Skipping to the next item"
 | |
|             continue
 | |
|         fi
 | |
| 
 | |
|         [[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item"
 | |
| 
 | |
|         #
 | |
|         # Interrogate the IA for the item we're working on. If it returns True
 | |
|         # we can proceed with tidying. The file 'TMP1' contains just a simple
 | |
|         # list of the files on the IIA relating to this item.
 | |
|         #
 | |
|         if ia list "$item" > "$TMP1"; then
 | |
|             #
 | |
|             # Save any directory associated with this item. This means that
 | |
|             # directories with names that don't conform to the "^hpr[0-9]{4}"
 | |
|             # pattern will be ignored, but this it *not* expected to happen.
 | |
|             # Note that directories without corresponding audio will not be
 | |
|             # cleaned up by this method, but again this is not expected to
 | |
|             # happen.
 | |
|             # TODO: be alert to such issues!
 | |
|             #
 | |
|             dirpath="$UPLOADS/$item"
 | |
|             if [[ -d "$dirpath" ]]; then
 | |
|                 echo "Storing directory: $item"
 | |
|                 dirs+=("$item")
 | |
|             fi
 | |
| 
 | |
|             moves=0
 | |
| 
 | |
|             #
 | |
|             # Scan the returned list to see if any files we have are online.
 | |
|             # Move to the ARCHIVE directory when there's a match.
 | |
|             #
 | |
|             while read -r file; do
 | |
|                 frompath="$UPLOADS/$file"
 | |
|                 topath="$ARCHIVE/$file"
 | |
| 
 | |
|                 if [[ -e "$frompath" ]]; then
 | |
|                     #
 | |
|                     # A file on the IA exists in the upload area. Move the
 | |
|                     # local one if we're not in dry-run mode, otherwise just
 | |
|                     # report the move we would do. If FORCE mode is on
 | |
|                     # overwrite the file.
 | |
|                     #
 | |
|                     if [[ $DRYRUN -eq 0 ]]; then
 | |
|                         movefile "$UPLOADS" "$ARCHIVE" "$file" "$FORCE" && ((moves++))
 | |
|                     else
 | |
|                         if [[ $FORCE -eq 0 ]]; then
 | |
|                             printf 'Would move %s\n\tto %s\n' "$frompath" "$topath"
 | |
|                         else
 | |
|                             printf 'Would move %s\n\toverwriting %s\n' "$frompath" "$topath"
 | |
|                         fi
 | |
|                     fi
 | |
|                 fi
 | |
|             done < "$TMP1"
 | |
| 
 | |
|             #
 | |
|             # Log this item
 | |
|             #
 | |
|             [[ $DRYRUN -eq 0 ]] && \
 | |
|                 printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \
 | |
|                     "$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE"
 | |
| 
 | |
|         else
 | |
|             printf 'Skipping %s; not in the IA\n' "$item"
 | |
|         fi
 | |
|     else
 | |
|         #
 | |
|         # Ignore all but the first file belonging to an IA identifier
 | |
|         #
 | |
|         _DEBUG "Skipped $path - repeated show number"
 | |
|         continue
 | |
|     fi
 | |
| 
 | |
| done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort  | cut -f2 -d' ')
 | |
| 
 | |
| #
 | |
| # No shows processed? There was nothing to do
 | |
| #
 | |
| if [[ $ind -eq 0 ]]; then
 | |
|     [[ $DRYRUN -eq 0 ]] && echo "Nothing to do"
 | |
|     exit
 | |
| fi
 | |
| 
 | |
| _DEBUG "Number of shows scanned: $ind"
 | |
| # _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")"
 | |
| 
 | |
| #
 | |
| # If there are no directories just exit.
 | |
| #
 | |
| [[ -v dirs ]] || exit
 | |
| 
 | |
| #
 | |
| # By an (as yet) unknown process we might get duplicates, so remove them here.
 | |
| #
 | |
| # mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq)
 | |
| declare -A unique
 | |
| for e in "${dirs[@]}"; do unique[$e]=1; done
 | |
| dirs=( "${!unique[@]}" )
 | |
| # mapfile -t dirs < <(printf '%s\n' "${!unique[@]}")
 | |
| 
 | |
| _DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")"
 | |
| 
 | |
| #
 | |
| # Clean up any empty directories. These may exist because we moved their
 | |
| # contents one file at a time. We only deal with the directories we've visited
 | |
| # though.
 | |
| #
 | |
| for dir in "${dirs[@]}"; do
 | |
|     path="$UPLOADS/$dir"
 | |
| 
 | |
|     if [[ $DRYRUN -eq 0 ]]; then
 | |
|         if is_empty "$path"; then
 | |
|             rm -rf "$path"
 | |
|             RES=$?
 | |
|             if [[ $RES -eq 0 ]]; then
 | |
|                 echo "Deleted $path"
 | |
|                 echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE"
 | |
|             else
 | |
|                 echo "Failed to delete: $path"
 | |
|             fi
 | |
|         else
 | |
|             echo "Directory is not empty: $path"
 | |
|             echo "Not deleted!"
 | |
|         fi
 | |
|     else
 | |
|         echo "Would delete directory $path"
 | |
|     fi
 | |
| 
 | |
| done
 | |
| 
 | |
| exit
 | |
| 
 | |
| # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker
 |