hpr-tools/InternetArchive/delete_ia_item

291 lines
8.6 KiB
Bash
Executable File

#!/bin/bash -
#===============================================================================
#
# FILE: delete_ia_item
#
# USAGE: ./delete_ia_item episode
#
# DESCRIPTION: Deletes an uploaded item on the IA. The item (identifier)
# can't be deleted entirely but it can be stripped of contents
# and metadata and left in a 'Reserved' state so the slot can be
# reused.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.3
# CREATED: 2022-05-08 19:40:37
# REVISION: 2022-08-14 23:09:51
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.3"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
# shellcheck source=function_lib.sh
source "$LIB"
#
# Colour codes
#
define_colours
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes messages if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: 1 [optional] exit value
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-d {0|1}] episode
Deletes an uploaded item on the IA. The item (identifier) can't be deleted
entirely but it can be stripped of contents and metadata and left in
a 'Reserved' state so the slot can be reused.
Options:
-h Print this help
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is moved but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
Arguments:
episode Defines the episode (IA identifier) to be deleted from
archive.org. These identifiers are in the format
'hprNNNN' where 'NNNN' is a number with leading
zeroes, and 'hpr' is mandatory.
The script attempts to reformat incorrect identifiers
before giving up. The missing 'hpr' is added, and
missing leading zeroes are inserted. Thus '42' and
'hpr42' become 'hpr0042'.
Environment variables:
delete_ia_item_DEBUG If set to a non-zero value then the debugging
statements in the script are executed. Otherwise if
set to zero, or if the variable is absent no debug
information is produced. The variable can be set
using the 'export' command or on the same line as the
command calling the script. See the example below.
Examples
./delete_ia_item 3594 # Run in (default) dry-run mode
./delete_ia_item -d1 3594 # Run in (explicit) dry-run mode
./delete_ia_item -d0 3594 # Live mode
delete_ia_item_DEBUG=1 ./delete_ia_item 3594
# Run in dry-run mode with debugging enabled
endusage
exit "$res"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
hprvps|marvin|borg)
BASEDIR="$HOME/IA" ;;
i7-desktop)
BASEDIR="$HOME/HPR/IA" ;;
*)
echo "Wrong host!"; exit 1 ;;
esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Directories and files
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Debug mode. Invoke it with: 'delete_ia_item_DEBUG=1 ./tidy_uploaded'
#
DEBUGVAR="${SCRIPT}_DEBUG"
DEBUG="${!DEBUGVAR:-0}"
[[ $DEBUG -eq 1 ]] && echo "Debug mode"
#
# File of processed shows
#
PROCFILE="$BASEDIR/.${SCRIPT}.dat"
[ -e "$PROCFILE" ] || touch "$PROCFILE"
#
# Prerequisites
#
jq=$(command -v jq)
[ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; }
ia=$(command -v ia)
[ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; }
#
# Process options
#
while getopts :d:h opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
h) _usage 0;;
*) echo "** Unknown option"
_usage 1;;
esac
done
shift $((OPTIND - 1))
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
#
# Should have only one argument
#
if [[ $# != 1 ]]; then
echo "${red}${SCRIPT} takes one argument${reset}"
_usage 1
fi
#
# Collect the argument and clean and validate it, forcing leading zeroes if
# needed
#
item="${1:-}"
item="${item,,}"
item="${item## }"
item="${item%% }"
if [[ $item =~ ^(hpr)?([0-9]{1,4})$ ]]; then
printf -v item 'hpr%04i' "${BASH_REMATCH[2]}"
else
echo "${red}Invalid episode specification: '$item'${reset}"
echo "${yellow}Use hprNNNN format with leading zeroes${reset}"
_usage 1
fi
_DEBUG "Dry run: $DRYRUN"
_DEBUG "Item chosen: $item"
#
# Check the item exists on the IA and if it does collect metadata and parse
# out the items we need.
#
_DEBUG "Testing IA for existence of $item"
if ia list "$item" > /dev/null 2>&1; then
ia metadata "$item" > "$TMP1"
# This one's an array but we want a CSV list.
# TODO: Not sure this works with tags containing spaces
# shellcheck disable=SC2046
subject="$(jq -r '.metadata.subject | @csv' "$TMP1")"
subject="${subject//\"\"\"/\"}"
creator="$(jq -r '.metadata.creator' "$TMP1")"
date="$(jq -r '.metadata.date' "$TMP1")"
_DEBUG "subject: $subject"
_DEBUG "creator: $creator"
_DEBUG "date: $date"
else
echo "${red}The requested item '$item' is not on archive.org${reset}"
exit 1
fi
#
# Either pretend to do stuff in dry-run mode or do it for real, but with
# confirmation first.
#
if [[ $DRYRUN -eq 1 ]]; then
echo "${yellow}Would have deleted item $item${reset}"
echo "Commands:"
echo "${blue}ia delete $item --all --no-backup${reset}"
echo "${blue}ia metadata $item --modify=title:\"Reserved\"${reset}"
echo "${blue}ia metadata $item --modify=description:\"Reserved\"${reset}"
echo "${blue}ia metadata $item --remove=creator:\"$creator\"${reset}"
echo "${blue}ia metadata $item --remove=date:$date${reset}"
echo "${blue}ia metadata $item --remove=subject:'$subject'${reset}"
echo
echo "${blue}Would have removed any cache entry found${reset}"
else
echo "${red}About to delete item $item.${reset}"
if yes_no "OK to continue? %s " "N"; then
# Not yet tested. Can't be until we have a need! Note that the quoted
# items will not be shown as such using this form of 'echo'.
# echo "Commands are being displayed, not run, until testing is complete"
#
# Now tested, and looking good
#
ia delete "$item" --all --no-backup
ia metadata "$item" --modify=title:"Reserved"
ia metadata "$item" --modify=description:"Reserved"
ia metadata "$item" --remove=creator:"$creator"
ia metadata "$item" --remove=date:"$date"
ia metadata "$item" --remove=subject:"$(printf "'%s'" "$subject")"
#
# Ensure the show is not marked as "processed" in the cache. We need
# 'grep' to determine if there's anything to do since 'sed' can't do
# this apparently.
#
if grep -q -E '^'"$item"'$' "$PROCFILE"; then
sed -i -e '/^'"$item"'$/d' "$PROCFILE"
echo "${yellow}$item removed from cache${reset}"
else
echo "${yellow}$item not found in cache${reset}"
fi
#
# Log this item
#
echo "$(date +%Y%m%d%H%M%S) deleted $item" >> "$LOGFILE"
else
echo "${red}Item not deleted. Aborting.${reset}"
fi
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21