1
0
forked from HPR/hpr-tools

Moved project directories and files to an empty local repo

This commit is contained in:
Dave Morriss
2024-06-04 16:35:44 +01:00
parent 2d2b937a9b
commit 38abbcdd39
271 changed files with 55348 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
# Version for i7-desktop
# .make_metadata.cfg 2023-07-06 11:54:49
#
max_epno = 9000
#uploads = "/var/IA/uploads" # on the VPS and marvin
uploads = "/home/cendjm/HPR/IA/uploads"
filetemplate = "hpr%04d.%s"
baseURL = "https://hackerpublicradio.org/"
#URLtemplate = "http://hackerpublicradio.org/eps/%s"
#URLtemplate = "https://hackerpublicradio.org/local/%s"
sourceURLtemplate = "https://hackerpublicradio.org/eps/%s/index.html"
IAURLtemplate = "https://archive.org/download/%s/%s"
#iauploadtemplate = "ia upload %s %s --remote-name=%s"
iauploadtemplate = "Upload %s %s '%s' '%s'"
iauploadoptions = "--retries=5 --no-derive -H x-archive-keep-old-version:0"

View File

@@ -0,0 +1,3 @@
uploads: /home/cendjm/HPR/IA/uploads
filetemplate: hpr%04d.%s
URLtemplate: http://hackerpublicradio.org/eps/%s

View File

@@ -0,0 +1,67 @@
#!/bin/bash -
#===============================================================================
#
# FILE: archive_metadata
#
# USAGE: ./archive_metadata
#
# DESCRIPTION: Adds metadata files (metadata_*.csv) to an archive for
# reference
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.1
# CREATED: 2015-06-14 12:57:04
# REVISION: 2015-06-14 12:59:48
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
DIR=${0%/*}
#
# Age threshold for archiving files
#
DAYS=120
#
# Add the eligible file names to an array (to count them)
#
declare -a files=($(find . -name "metadata_*" -mtime +$DAYS -print))
if [[ ${#files[@]} > 0 ]]; then
#
# Uncompress the archive
#
echo "Uncompressing archive..."
bunzip2 meta.tar.bz2
#
# Add a bunch of older metadata files
#
echo "Adding files to the archive..."
tar --verbose --append --file=meta.tar $(find . -name "metadata_*" -mtime +$DAYS)
#
# Zip up the archive again
#
echo "Compressing the archive..."
bzip2 meta.tar
#
# Delete all those old files
#
echo "Deleting archived files..."
find . -name "metadata_*" -mtime +$DAYS -print -exec rm -f {} \;
else
echo "Nothing to archive"
fi
exit
# vim: syntax=sh:ts=8:sw=4:et:tw=78:fo=tcrqn21

62
InternetArchive/check_missing Executable file
View File

@@ -0,0 +1,62 @@
#!/bin/bash -
#===============================================================================
#
# FILE: check_missing
#
# USAGE: ./check_missing [start] [end]
#
# DESCRIPTION: Looks for missing audio files in the upload area and in the
# upload journal on the VPS. The upload area is on the VPS at
# /var/IA/uploads and the journal is (currently) in
# ~dave/IA/ias3uploads.jnl
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.1
# CREATED: 2015-06-14 13:07:23
# REVISION: 2015-06-14 13:08:07
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
DIR=${0%/*}
#
# Where the tools are
#
BASEDIR="$HOME/IA"
#
# For the moment the files for uploading are in two places
#
UPLOADS="/var/IA/uploads"
#
# Where the journal is
#
JNL="$BASEDIR/ias3upload.jnl"
#
# Default episode range
#
START=${1:-1300}
END=${2:-1799}
#
# Go looking for missing stuff
#
for (( i=${START}; i<=${END}; i++ ))
do
if [ ! -e "$UPLOADS/hpr$i.wav" ]; then
if ! grep -q "hpr$i.wav" $JNL; then
echo "Missing hpr$i.wav";
fi;
fi;
done
exit

276
InternetArchive/check_week Executable file
View File

@@ -0,0 +1,276 @@
#!/bin/bash -
#===============================================================================
#
# FILE: check_week
#
# USAGE: ./check_week -h -i -v [week_number]
#
# DESCRIPTION: Checks the upcoming week, or any week, to ensure there are shows
# on the IA for that period. It mainly makes sense to look into
# the future, but you can look backwards in the same year if
# required.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2022-01-27 10:45:41
# REVISION: 2022-02-25 22:22:19
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
VERSION="0.0.2"
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
;;
hprvps|marvin|borg)
BASEDIR="$HOME/IA"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || exit 1
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
# shellcheck source=/home/cendjm/bin/function_lib.sh
source "$LIB"
#
# Colour codes
#
define_colours
#=== FUNCTION ================================================================
# NAME: _open_tunnel
# DESCRIPTION: Opens the SSH tunnel to the HPR server if necessary
# PARAMETERS:
# RETURNS:
#===============================================================================
_open_tunnel () {
local open_tunnel="${1}"
if [[ $(pgrep -u "$USER" -f 'ssh.*hpr@hackerpublicradio.org' -c) -eq 0 ]]; then
$open_tunnel || { echo "Failed to open SSH tunnel"; exit 1; }
fi
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [week_no]
Checks a future week to ensure all the shows are on the Internet Archive.
Options:
-h Print this help
-v Enable verbose mode where a little more information is
output. Mainly the dates relating to the chosen week
number.
-i Ignore shows missing from the database during the
chosen week. Normally the script does not proceed if
there are fewer than 5 shows in a week.
Arguments:
week_no (optional, default current week) the week number to be
examined. This is a number in the range 1..52.
Anything else is illegal.
Environment variables
check_week_DEBUG If set to a non-zero value then the debugging
statements in the script are executed. Otherwise if
set to zero, or if the variable is absent no debug
information is produced. The variable can be set
using the 'export' command or on the same line as the
command calling the script. See the example below.
Examples
./check_week # Check the current week
./check_week -i # Check the current week ignoring missing shows
./check_week 6 # Check week 6 of the current year
check_week_DEBUG=1 ./check_week # Run with debugging enabled
endusage
exit "$result"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Debug mode. Invoke it with: 'check_week_DEBUG=1 ./check_week'
#
DEBUGVAR="${SCRIPT}_DEBUG"
DEBUG="${!DEBUGVAR:-0}"
#
# Process options
#
while getopts hiv opt
do
case "${opt}" in
h) _usage 1;;
i) IGNORE=1;;
v) VERBOSE=1;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
IGNORE=${IGNORE:-0}
VERBOSE=${VERBOSE:-0}
#
# Check arguments
#
if [[ $# -gt 1 ]]; then
_usage 1
exit 1
fi
#
# Default missing any week number and validate what's found. Take care that
# numbers with leading zeroes are by default taken to be octal! We coerce to
# base 10 just in case.
#
weekno="${1:-"$(date +%V)"}"
weekno=$((10#$weekno))
if [[ ! $weekno =~ ^[0-9]{1,2}$ ]]; then
echo "Invalid week number: $weekno"
exit 1
fi
if [[ $weekno -lt 1 || $weekno -gt 52 ]]; then
echo "Invalid week number: $weekno"
exit 1
fi
#
# Check dependencies
#
OTUNNEL=$(command -v open_tunnel)
if [[ -z $OTUNNEL ]]; then
echo "Can't find the script 'open_tunnel'"
exit 1
fi
QUERYTOOL=$(command -v query2csv)
if [[ -z $QUERYTOOL ]]; then
echo "Can't find the tool query2csv"
exit 1
fi
#
# Open the SSH tunnel if it's not already open
#
_open_tunnel "$OTUNNEL"
#
# Gather and compute date information. Week numbers may start with a zero so
# we have to coerce them into base 10 from what will be presumed to be octal.
#
curweek="$((10#$(date +%V)))"
curdow="$(date +%u)"
woffset=$((weekno - curweek))
offset="$((woffset * 7 - curdow + 1)) days"
weekstart="$(date -d "$offset" +%F)"
weekfinish="$(date -d "$weekstart + 4 days" +%F)"
_DEBUG "Current week number: $curweek"
_DEBUG "Current day number: $curdow"
_DEBUG "Argument: $weekno"
_DEBUG "Week offset: $woffset"
_DEBUG "Day offset: $offset"
_DEBUG "Start of chosen week: $weekstart"
_DEBUG "End of chosen week: $weekfinish"
#
# Report what's happening in verbose mode
#
[ $VERBOSE -eq 1 ] && printf 'Processing week %s (%s to %s)\n' \
"$weekno" "$weekstart" "$weekfinish"
#
# Make SQL
#
sql="select id from eps where date between '$weekstart' and '$weekfinish' order by id"
_DEBUG "SQL: $sql"
#
# Collect the shows
#
declare -a shows
mapfile -t shows < <(query2csv "$sql")
_DEBUG "shows: ${shows[*]}"
#
# Check we have enough shows, either exiting if not or continuing to check
# those we have.
#
if [[ ${#shows[*]} -ne 5 ]]; then
echo "${red}Didn't find the expected number of shows for this week${reset}"
if [[ $IGNORE -eq 0 ]]; then
exit 1
fi
fi
#
# Check the shows are on the IA
#
for show in "${shows[@]/#/hpr}"; do
if ia list "$show" > /dev/null 2>&1; then
echo "${green}$show has been uploaded${reset}"
else
echo "${red}$show has not been uploaded${reset}"
fi
done
#
# All done
#
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

107
InternetArchive/collect_show_data Executable file
View File

@@ -0,0 +1,107 @@
#!/bin/bash -
#===============================================================================
#
# FILE: collect_show_data
#
# USAGE: ./collect_show_data fromshow [toshow]
#
# DESCRIPTION: Capture metadata for a range of shows for adding to the ia.db
# database. Do it by show number rather than by date (see
# 'collect_metadata')
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.5
# CREATED: 2018-01-27 14:13:19
# REVISION: 2023-01-18 12:30:17
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.5"
SCRIPT=${0##*/}
#DIR=${0%/*}
showmax=10
#
# We need a 'from' and 'to' argument as show numbers
#
if [[ $# -ne 1 && $# -ne 2 ]]; then
echo -e "Usage: $SCRIPT fromshow [toshow]\\n(Version $VERSION)"
exit 1
fi
#
# Validate the arguments
#
for arg; do
if [[ ! $arg =~ ^[0-9]{1,4}$ ]]; then
echo "Invalid show number: $arg"
echo "Use a plain number"
exit 1
fi
done
#
# Save the arguments
#
from="$1"
if [[ $# -eq 1 ]]; then
to="$1"
else
to="$2"
fi
#
# Check the arguments are in the right order
#
if [[ $from -gt $to ]]; then
echo "First argument must be less than or equal to second"
exit 1
fi
#
# Make an array with the range in it. Ensure numbers have leading zeroes
#
mapfile ids < <(eval "printf '%04d\n' {$from..$to}")
#
# Check the request wasn't too big
#
if [[ ${#ids[@]} -gt $showmax ]]; then
echo "Too many shows requested; limit $showmax"
exit 1
fi
#
# Output file - with leading zeroes in numbers
#
printf -v metadata 'ia_metadata_hpr%04d-hpr%04d.json' "$from" "$to"
echo "From $from, to $to -> $metadata"
#
# Make the request
#
ia metadata "${ids[@]/#/hpr}" > "$metadata" 2> /dev/null
RES=$?
if [[ $RES -ne 0 ]]; then
echo "Data collection has failed"
if [[ ! -s $metadata ]]; then
rm -f "$metadata"
fi
else
echo "Metadata is in $metadata"
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

290
InternetArchive/delete_ia_item Executable file
View File

@@ -0,0 +1,290 @@
#!/bin/bash -
#===============================================================================
#
# FILE: delete_ia_item
#
# USAGE: ./delete_ia_item episode
#
# DESCRIPTION: Deletes an uploaded item on the IA. The item (identifier)
# can't be deleted entirely but it can be stripped of contents
# and metadata and left in a 'Reserved' state so the slot can be
# reused.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.3
# CREATED: 2022-05-08 19:40:37
# REVISION: 2022-08-14 23:09:51
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.3"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
# shellcheck source=function_lib.sh
source "$LIB"
#
# Colour codes
#
define_colours
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes messages if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: 1 [optional] exit value
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-d {0|1}] episode
Deletes an uploaded item on the IA. The item (identifier) can't be deleted
entirely but it can be stripped of contents and metadata and left in
a 'Reserved' state so the slot can be reused.
Options:
-h Print this help
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is moved but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
Arguments:
episode Defines the episode (IA identifier) to be deleted from
archive.org. These identifiers are in the format
'hprNNNN' where 'NNNN' is a number with leading
zeroes, and 'hpr' is mandatory.
The script attempts to reformat incorrect identifiers
before giving up. The missing 'hpr' is added, and
missing leading zeroes are inserted. Thus '42' and
'hpr42' become 'hpr0042'.
Environment variables:
delete_ia_item_DEBUG If set to a non-zero value then the debugging
statements in the script are executed. Otherwise if
set to zero, or if the variable is absent no debug
information is produced. The variable can be set
using the 'export' command or on the same line as the
command calling the script. See the example below.
Examples
./delete_ia_item 3594 # Run in (default) dry-run mode
./delete_ia_item -d1 3594 # Run in (explicit) dry-run mode
./delete_ia_item -d0 3594 # Live mode
delete_ia_item_DEBUG=1 ./delete_ia_item 3594
# Run in dry-run mode with debugging enabled
endusage
exit "$res"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
hprvps|marvin|borg)
BASEDIR="$HOME/IA" ;;
i7-desktop)
BASEDIR="$HOME/HPR/IA" ;;
*)
echo "Wrong host!"; exit 1 ;;
esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Directories and files
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Debug mode. Invoke it with: 'delete_ia_item_DEBUG=1 ./tidy_uploaded'
#
DEBUGVAR="${SCRIPT}_DEBUG"
DEBUG="${!DEBUGVAR:-0}"
[[ $DEBUG -eq 1 ]] && echo "Debug mode"
#
# File of processed shows
#
PROCFILE="$BASEDIR/.${SCRIPT}.dat"
[ -e "$PROCFILE" ] || touch "$PROCFILE"
#
# Prerequisites
#
jq=$(command -v jq)
[ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; }
ia=$(command -v ia)
[ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; }
#
# Process options
#
while getopts :d:h opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
h) _usage 0;;
*) echo "** Unknown option"
_usage 1;;
esac
done
shift $((OPTIND - 1))
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
#
# Should have only one argument
#
if [[ $# != 1 ]]; then
echo "${red}${SCRIPT} takes one argument${reset}"
_usage 1
fi
#
# Collect the argument and clean and validate it, forcing leading zeroes if
# needed
#
item="${1:-}"
item="${item,,}"
item="${item## }"
item="${item%% }"
if [[ $item =~ ^(hpr)?([0-9]{1,4})$ ]]; then
printf -v item 'hpr%04i' "${BASH_REMATCH[2]}"
else
echo "${red}Invalid episode specification: '$item'${reset}"
echo "${yellow}Use hprNNNN format with leading zeroes${reset}"
_usage 1
fi
_DEBUG "Dry run: $DRYRUN"
_DEBUG "Item chosen: $item"
#
# Check the item exists on the IA and if it does collect metadata and parse
# out the items we need.
#
_DEBUG "Testing IA for existence of $item"
if ia list "$item" > /dev/null 2>&1; then
ia metadata "$item" > "$TMP1"
# This one's an array but we want a CSV list.
# TODO: Not sure this works with tags containing spaces
# shellcheck disable=SC2046
subject="$(jq -r '.metadata.subject | @csv' "$TMP1")"
subject="${subject//\"\"\"/\"}"
creator="$(jq -r '.metadata.creator' "$TMP1")"
date="$(jq -r '.metadata.date' "$TMP1")"
_DEBUG "subject: $subject"
_DEBUG "creator: $creator"
_DEBUG "date: $date"
else
echo "${red}The requested item '$item' is not on archive.org${reset}"
exit 1
fi
#
# Either pretend to do stuff in dry-run mode or do it for real, but with
# confirmation first.
#
if [[ $DRYRUN -eq 1 ]]; then
echo "${yellow}Would have deleted item $item${reset}"
echo "Commands:"
echo "${blue}ia delete $item --all --no-backup${reset}"
echo "${blue}ia metadata $item --modify=title:\"Reserved\"${reset}"
echo "${blue}ia metadata $item --modify=description:\"Reserved\"${reset}"
echo "${blue}ia metadata $item --remove=creator:\"$creator\"${reset}"
echo "${blue}ia metadata $item --remove=date:$date${reset}"
echo "${blue}ia metadata $item --remove=subject:'$subject'${reset}"
echo
echo "${blue}Would have removed any cache entry found${reset}"
else
echo "${red}About to delete item $item.${reset}"
if yes_no "OK to continue? %s " "N"; then
# Not yet tested. Can't be until we have a need! Note that the quoted
# items will not be shown as such using this form of 'echo'.
# echo "Commands are being displayed, not run, until testing is complete"
#
# Now tested, and looking good
#
ia delete "$item" --all --no-backup
ia metadata "$item" --modify=title:"Reserved"
ia metadata "$item" --modify=description:"Reserved"
ia metadata "$item" --remove=creator:"$creator"
ia metadata "$item" --remove=date:"$date"
ia metadata "$item" --remove=subject:"$(printf "'%s'" "$subject")"
#
# Ensure the show is not marked as "processed" in the cache. We need
# 'grep' to determine if there's anything to do since 'sed' can't do
# this apparently.
#
if grep -q -E '^'"$item"'$' "$PROCFILE"; then
sed -i -e '/^'"$item"'$/d' "$PROCFILE"
echo "${yellow}$item removed from cache${reset}"
else
echo "${yellow}$item not found in cache${reset}"
fi
#
# Log this item
#
echo "$(date +%Y%m%d%H%M%S) deleted $item" >> "$LOGFILE"
else
echo "${red}Item not deleted. Aborting.${reset}"
fi
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

224
InternetArchive/delete_uploaded Executable file
View File

@@ -0,0 +1,224 @@
#!/bin/bash -
#===============================================================================
#
# FILE: delete_uploaded
#
# USAGE: ./delete_uploaded [-h] [-v] [-d {0|1}]
#
# DESCRIPTION: Deletes HPR audio and other show-related files on the VPS
# after their shows have been uploaded to the Internet Archive
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Was 'delete_uploaded_new' while in development. Now replaces
# the original 'delete_uploaded'.
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.1.4
# CREATED: 2017-08-12 12:32:13
# REVISION: 2022-03-30 10:57:23
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.1.4"
SCRIPT=${0##*/}
#DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#
# Configure depending whether local or on the VPS
#
case $HOSTNAME in
hprvps|marvin|borg) UPLOADS="/var/IA/uploads" ;;
i7-desktop) UPLOADS="$HOME/HPR/IA/uploads" ;;
*) echo "Wrong host!"; exit 1 ;;
esac
#=== FUNCTION ================================================================
# NAME: cleanup_temp
# DESCRIPTION: Cleanup temporary files in case of a keyboard interrupt
# (SIGINT) or a termination signal (SIGTERM) and at script
# exit
# PARAMETERS: * - names of temporary files to delete
# RETURNS: Nothing
#===============================================================================
function cleanup_temp {
for tmp in "$@"; do
[ -e "$tmp" ] && rm --force "$tmp"
done
exit 0
}
#=== FUNCTION ================================================================
# NAME: is_empty
# DESCRIPTION: Check whether a directory is empty
# PARAMETERS: $1 Directory to test
# RETURNS: True if empty, otherwise false
#===============================================================================
is_empty() {
test -z "$(find "$1" -mindepth 1 -printf X -quit)"
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: 1 [optional] exit value
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}]
Deletes HPR audio and other show-related files on the VPS after their shows
have been uploaded to the Internet Archive.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is deleted but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
endusage
exit "$res"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Process options
#
while getopts :d:hv opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
h) _usage 0;;
v) VERBOSE=1;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
VERBOSE=${VERBOSE:-0}
#
# Should have no arguments
#
if [[ $# != 0 ]]; then
echo "** ${SCRIPT} takes no arguments"
_usage 1
fi
#
# Declarations
#
#re="^hpr[0-9]{4}"
declare -a dirs
lastitem=
while read -r path; do
#
# Extract the path relative to $UPLOADS and the IA item name from the
# returned path
#
relpath="${path#"$UPLOADS"/}"
item="${relpath:0:7}"
[ $VERBOSE -eq 1 ] && echo "Found $path"
#
# Record all directories from the 'find'. Note that this means the
# directory must begin with "^hpr[0-9]{4}", which may give a problem if
# there's a directory that doesn't conform
#
if [[ -d $path ]]; then
dirs+=("$path")
fi
#
# Detect that the item prefix has changed. If it has we're processing
# a new IA identifier, so work on this one
#
if [[ $item != "$lastitem" ]]; then
lastitem=$item
[ $VERBOSE -eq 1 ] && echo "Checking IA for $lastitem"
if ia list "$lastitem" > "$TMP1"; then
#
# Scan the returned list to see if any files we have are online.
# Delete when there's a match.
#
while read -r file; do
if [[ -e "$UPLOADS/$file" ]]; then
#
# A file on the IA exists in the upload area. Delete the
# local one if we're not in dry-run mode, otherwise just
# report the deletion we would do.
#
if [[ $DRYRUN -eq 0 ]]; then
rm -f "$UPLOADS/$file"
echo "Deleted $UPLOADS/$file"
else
echo "Would delete $UPLOADS/$file"
fi
fi
done < "$TMP1"
else
#
# End the outer 'while' loop because we hit an item not on the IA.
# We rely on the list being sorted for this to be sensible
#
[ $VERBOSE -eq 1 ] && echo "Item not found on IA: $lastitem"
break
fi
else
#
# Ignore all but the first file belonging to an IA identifier
#
[ $VERBOSE -eq 1 ] && echo "Skipped $path"
continue
fi
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
#
# Clean up any empty directories
#
for dir in "${dirs[@]}"; do
if is_empty "$dir"; then
if [[ $DRYRUN -eq 0 ]]; then
rmdir "$dir"
echo "Deleted $dir"
else
echo "Would delete $dir"
fi
fi
done
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

578
InternetArchive/future_upload Executable file
View File

@@ -0,0 +1,578 @@
#!/bin/bash -
#===============================================================================
#
# FILE: future_upload
#
# USAGE: ./future_upload
#
# DESCRIPTION: Uploads future HPR shows based on what is in the upload area
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as
# well as 'update_state'
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.14
# CREATED: 2021-01-07 12:11:02
# REVISION: 2024-03-03 14:12:30
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
VERSION="0.0.14"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
# shellcheck disable=SC1090
source "$LIB"
#=== FUNCTION ================================================================
# NAME: check_uploads
# DESCRIPTION: Determines if files exist for uploading
# PARAMETERS: 1 - filename prefix e.g. 'hpr9999'
# RETURNS: True/false
#===============================================================================
check_uploads () {
local prefix=${1:?Usage: check_uploads prefix}
local suff
#
# Look for files called hpr1234.flac and so on. Don't bother with the
# hpr1234_source.flac one. As soon as a file is missing return with false.
#
for suff in flac mp3 ogg opus spx wav; do
if [[ ! -e $UPLOADS/$prefix.$suff ]]; then
return 1
fi
done
return 0
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: 1 [optional] exit value
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-v] [-D] [-d {0|1}] [-F] [-r] [-l cp]
Uploads HPR shows to the Internet Archive that haven't yet been uploaded. This
is as an alternative to uploading the next 5 shows each week for the coming
week.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
-D Run in debug mode where a lot more information is
reported
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is uploaded but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-F Force the upload of a show even if the checks for its
state in the 'reservations' table gives the wrong
state or none at all. This is a rare event which may
come about if an "emergency" show is being deployed or
in some other exceptional circumstance.
-r Run in 'remote' mode, using the live database over an
(already established) SSH tunnel. Default is to run
against the local database.
-l N Control the number of shows that can be uploaded at
once. The range is 1 to $DEFLIMIT.
Notes:
1. When running on 'borg' the method used is to run in faux 'local' mode.
This means we have an open tunnel to the HPR server (mostly left open) and
the default file .hpr_db.cfg points to the live database via this tunnel.
So we do not use the -r option here. This is a bit of a hack! Sorry!
endusage
exit "$res"
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
hprvps|marvin|borg)
UPLOADS="/data/IA/uploads"
BASEDIR="$HOME/IA" ;;
i7-desktop)
UPLOADS="$HOME/HPR/IA/uploads"
BASEDIR="$HOME/HPR/IA" ;;
*)
echo "Wrong host!"; exit 1 ;;
esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Prepare for logging
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Tools
#
BASECOM='curl -K ./.hpradmin_curlrc -s'
URL="https://hub.hackerpublicradio.org/cms/status.php"
# QUERY1="${BASECOM} ${URL}"
QUERY2="${BASECOM} -o - ${URL}"
UPSTATE="$BASEDIR/update_state"
#
# Fallback URL
#
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
#
# Prerequisites
#
# jq=$(command -v jq)
# [ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; }
ia=$(command -v ia)
[ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; }
[ -e "$BASEDIR/make_metadata" ] || {
echo "Needs the 'make_metadata' script"
exit 1
}
[ -e "$UPSTATE" ] || {
echo "Needs the 'update_state' script"
exit 1
}
#
# File of processed shows
#
PROCFILE="$BASEDIR/.${SCRIPT}.dat"
[ -e "$PROCFILE" ] || touch "$PROCFILE"
#
# Constants
#
RETRIES=5
DEFLIMIT=20
#
# Process options
#
while getopts :d:FhvDr:l: opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
D) DEBUG=1;;
F) FORCE=1;;
h) _usage 0;;
v) VERBOSE=1;;
r) REMOTE=1;;
l) LIMIT=$OPTARG;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
FORCE=${FORCE:-0}
VERBOSE=${VERBOSE:-0}
DEBUG=${DEBUG:-0}
REMOTE=${REMOTE:-0}
if [[ $REMOTE -eq 0 ]]; then
dbconfig="$BASEDIR/.hpr_db.cfg"
[[ $VERBOSE -eq 1 ]] && echo "Local database mode"
else
dbconfig="$BASEDIR/.hpr_livedb.cfg"
[[ $VERBOSE -eq 1 ]] && echo "Remote database mode"
fi
LIMIT=${LIMIT:-$DEFLIMIT}
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
_usage 1
fi
#
# Should have no arguments
#
if [[ $# != 0 ]]; then
echo "** ${SCRIPT} takes no arguments"
_usage 1
fi
#
# Declarations
#
declare -A processed
declare -A ready
declare -A uploads
minshow=
maxshow=
lastitem=
#
# Load array of processed shows
#
while read -r item; do
processed+=([$item]=1)
done < "$PROCFILE"
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
#
# TODO: Create the associative array 'ready' containing the numbers of shows
# ready for upload. This is a way to ensure that we don't try and upload shows
# in transit to the upload area.
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Proposed code. Not sure what the actual URL will be nor what will be
# returned if nothing is ready for upload yet
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# json=$(curl http://hackerpublicradio.org/queue.php -s -o -)
# while read -r showno; do
# ready+=([$showno]=1)
# done < <(echo "${json}" | jq '.READY_FOR_IA_UPLOAD[] | tonumber')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Change of plan. Now we have a list of CSV values, so we need to do something
# like this:
#
# reservations=$($BASECOM -o - $URL)
# while read -r line; do
# if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
# state="${BASH_REMATCH[5]}"
# show="${BASH_REMATCH[2]}"
# fi
# if [[ $state = 'MEDIA_TRANSCODED' ]]; then
# ready+=([$show]=1)
# fi
# done <<< $reservations
#
# At the end of this the associative array 'ready' will contain the keys of
# shows that are ready for upload (presumably) so we can look in this array to
# double check.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if [[ $FORCE -eq 0 ]]; then
#
# Collect the current table of shows requiring work. We expect something like:
# timestamp_epoc,ep_num,ep_date,key,status,email
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
# TODO: Check for a failure in the query?A
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
#
reservations=$($QUERY2) || {
# echo "Problem querying $URL"
# echo "Falling back to $URL_BAK"
# reservations=$($QUERY2_BAK) || {
# echo "Failed with fallback URL - aborting"
# exit 1
# }
echo "Failed to query $URL - aborting"
exit 1
}
_DEBUG "reservations = $reservations"
#
# The query above might fail in a way that just returns an empty string,
# so check for that
#
if [[ -z $reservations ]]; then
echo "No reply from $URL - can't continue"
exit 1
fi
#
# The query returns the bare number, but we're using 'hprxxxx' as the key in
# the 'ready' array.
#
while read -r line; do
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
state="${BASH_REMATCH[5]}"
show="${BASH_REMATCH[2]}"
fi
if [[ $state = 'MEDIA_TRANSCODED' ]]; then
ready+=([hpr$show]=1)
fi
done <<< "$reservations"
_DEBUG "ready = ${!ready[*]}"
else
[ "$VERBOSE" -eq 1 ] && {
echo "V: Not checking reservations table; force option used"
}
fi
#
# Process files. There will be several with the same prefix so look for
# a change of prefix
#
while read -r path; do
#
# Extract the path relative to $UPLOADS and the IA item name from the
# returned path. Assume names are 'hpr9999' format (with leading zeroes if
# appropriate).
#
relpath="${path#"$UPLOADS"/}"
item="${relpath:0:7}"
_DEBUG "Found path $path"
_DEBUG "Relative path $relpath"
_DEBUG "Item $item"
#
# Detect that the item prefix has changed. If it has we're processing
# a new IA identifier, so work on this one
#
if [[ $item != "$lastitem" ]]; then
lastitem=$item
echo -n "$lastitem "
#
# Have we already processed it? It might just not be in the cache even
# though processed by some other means.
#
if [[ ! -v "processed[$lastitem]" ]]; then
[ "$VERBOSE" -eq 1 ] && {
echo "V: Not yet processed (or not cached) $lastitem"
echo "V: Checking IA for $lastitem"
}
#
# Ask the IA
#
if ia list "$lastitem" > /dev/null 2>&1; then
#
# We can't tell with 'ia list' whether the show is
# a 'reserved' one. Need to look deeper.
#
#
# It's on the IA already, save in the array
#
#[ $VERBOSE -eq 1 ] && echo "V: Already on IA - $lastitem"
echo "- cached"
processed+=([$lastitem]=1)
else
#
# Is the show ready for upload?
#
if [[ $FORCE -eq 0 ]]; then
if [[ ! -v "ready[$lastitem]" ]]; then
echo "- not ready"
continue
fi
fi
#
# Need to upload this one
#
#[ $VERBOSE -eq 1 ] && echo "V: To be uploaded to IA - $lastitem"
echo "- to upload"
uploads+=([${lastitem:3}]=1)
#
# First show we find not on the IA should be the lowest number
#
[ -z "$minshow" ] && minshow="${lastitem:3}"
#
# Last show number should be the highest
#
maxshow="${lastitem:3}"
#
# Stop the loop if we have reached the limiting number
#
[[ ${#uploads[@]} -eq $LIMIT ]] && {
echo "Upload limit ($LIMIT) reached"
break
}
fi
else
#
# Recorded as having been seen
#
#[ $VERBOSE -eq 1 ] && echo "V: Already processed $lastitem"
echo "- processed"
fi
fi
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
#
# Write the processed array to the cache file unless in dry-run mode
#
# [ $DEBUG -eq 1 ] && { echo -n 'D> '; declare -p processed; }
_DEBUG "processed = ${!processed[*]}"
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
if [[ $DRYRUN -ne 1 ]]; then
while read -r item; do
echo "$item"
done < <(printf '%s\n' "${!processed[@]}" | sort -u ) > "$PROCFILE"
fi
#
# Generate the list of uploads for the 'make_metadata' option '-list=1,2,3'.
# Order is unimportant because make_metadata sorts internally.
#
_DEBUG "uploads = ${!uploads[*]}"
[ "$VERBOSE" -eq 1 ] && echo "Number of shows for upload: ${#uploads[@]}"
printf -v list '%s,' "${!uploads[@]}"
list="${list:0:-1}"
#
# If there are no uploads to do we can stop
#
[[ ! -v uploads[@] ]] && { echo "Nothing to do!"; exit; }
#
# Check that the shows being uploaded have all their files and log what is
# happening.
#
while read -r show; do
echo "$(date +%Y%m%d%H%M%S) preparing to upload hpr$show" >> "$LOGFILE"
if ! check_uploads "hpr$show"; then
echo "Missing files for show $show. Aborted!"
echo "$(date +%Y%m%d%H%M%S) aborting on hpr$show - missing files" >> "$LOGFILE"
exit 1
fi
done < <(printf '%s\n' "${!uploads[@]}" | sort)
#
# Define output files. If the list contains one element then it's a different
# name from the multi-element case (make_metadata does this too).
#
if [[ ${#uploads[@]} -eq 1 ]]; then
metadata="metadata_${minshow}.csv"
script="script_${minshow}.sh"
else
metadata="metadata_${minshow}-${maxshow}.csv"
script="script_${minshow}-${maxshow}.sh"
fi
#
# Perform the uploads or report what would be done
#
if [[ $DRYRUN -eq 1 ]]; then
echo "Dry run: Would have uploaded list '$list'"
echo "Dry run: Would have created $metadata and $script"
echo "Dry run: Would have uploaded $metadata and run $script"
echo "Dry run: Would have used $dbconfig"
echo "$BASEDIR/make_metadata -dbconf=${dbconfig} -list=\"$list\" -verb -out -script"
echo "$(date +%Y%m%d%H%M%S) no uploads done - dry-run mode" >> "$LOGFILE"
else
echo "Uploading the list '$list'"
if yes_no "OK to continue? %s " "N"; then
# shellcheck disable=2086
{
#
# Make the metadata
#
$BASEDIR/make_metadata -dbconf=${dbconfig} -list="$list" -verb -out -script
RES=$?
if [[ $RES -eq 0 ]]; then
#
# Upload in spreadsheet mode and run the generated script
#
ia upload --retries=$RETRIES --spreadsheet=${metadata} \
-n -H x-archive-keep-old-version:0
RES=$?
if [[ $RES -eq 0 ]]; then
#
# Upload worked. Run the script if there is one
#
[[ -e $script ]] && ./${script}
else
echo "Failed to upload to IA; aborting"
echo "$(date +%Y%m%d%H%M%S) IA uploads aborted due to errors" >> "$LOGFILE"
exit 1
fi
#
# Append the sorted show details to the cache
#
echo "$list" |\
sed -e 's/\([0-9]\{4\}\)/hpr\1/g; s/,/\n/g' | sort >> "$PROCFILE"
echo "Uploaded ${#uploads[@]} shows"
echo "$(date +%Y%m%d%H%M%S) ${#uploads[@]} uploads completed" >> "$LOGFILE"
#
# Update the state in the HPR database, unless we're using
# FORCE. Pass the limit used here to this script so it can
# stop looking for work unnecessarily
#
if [[ $FORCE -eq 0 ]]; then
$UPSTATE -l$LIMIT
RES=$?
if [[ $RES -ne 0 ]]; then
echo "Problem updating database state"
exit 1
fi
else
echo "Not updating the database, FORCE mode is on"
fi
else
echo "Upload aborted due to errors"
echo "$(date +%Y%m%d%H%M%S) uploads aborted due to errors" >> "$LOGFILE"
exit 1
fi
}
else
echo "Not uploaded, as requested"
echo "$(date +%Y%m%d%H%M%S) uploads aborted by user" >> "$LOGFILE"
fi
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

View File

@@ -0,0 +1,182 @@
#!/bin/bash -
#===============================================================================
#
# FILE: generate_asset_list
#
# USAGE: ./generate_asset_list episode [episode [episode ...]]
#
# DESCRIPTION: Produces SQL for uploading assets for a show without them.
# This is very rough and ready and can only be run on 'borg'
# with access to the 'uploads' and 'done' directories. It's
# based on Ken Fallon's suggested way of doing this, but has
# been adjusted somewhat.
#
# 2023-08-17: Modified to accept multiple episodes per run.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.4
# CREATED: 2023-06-26 21:50:14
# REVISION: 2023-08-18 00:00:13
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
# VERSION="0.0.4"
#
# Needs an argument
#
if [[ $# -eq 0 ]]; then
echo "Usage: $SCRIPT episode [episode [episode ...]]"
exit 1
fi
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
borg)
BASEDIR="$HOME/IA"
UPLOADS="/data/IA/uploads"
DONE="/data/IA/done"
;;
i7-desktop)
BASEDIR="$HOME/HPR/IA"
UPLOADS="$BASEDIR/uploads"
DONE="$BASEDIR/done"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
#
# It's easiest to work here
#
cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }
#=== FUNCTION ================================================================
# NAME: cleanup_temp
# DESCRIPTION: Cleanup temporary files in case of a keyboard interrupt
# (SIGINT) or a termination signal (SIGTERM) and at script
# exit
# PARAMETERS: * - names of temporary files to delete
# RETURNS: Nothing
#===============================================================================
function cleanup_temp {
for tmp; do
[ -e "$tmp" ] && rm --force "$tmp"
done
exit 0
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Count number of arguments processed
#
argcount=0
#
# Loop through arguments
#
for arg; do
failed=0
#
# Accept a bare number or 'hpr<number>'. Parse out the pieces we'll need later
#
if [[ $arg =~ (hpr)?([0-9]{1,4}) ]]; then
printf -v show 'hpr%04d' "${BASH_REMATCH[2]}"
printf -v episode_id '%d' "${BASH_REMATCH[2]}"
else
echo "Argument '$arg' must be '<number>' or hpr<number>'"
echo "Skipping this argument"
continue
fi
#
# Argument is OK, so prepare for the inner loop
#
echo 'INSERT INTO assets (episode_id,filename,extension,size,sha1sum,mime_type,file_type) VALUES' >> "$TMP1"
#
# Loop through the audio types. Keep in this order since the 'wav'
# extension needs to be the last one.
#
for extension in mp3 ogg spx flac opus wav; do
f1="$UPLOADS/${show}.${extension}"
f2="$DONE/${show}.${extension}"
#
# Find the file in either of the two directories.
# If any file type in the list is missing abort.
# TODO: This will leave a partial output which isn't good!
#
if [[ -f "$f1" ]]; then
filename="$f1"
elif [[ -f "$f2" ]]; then
filename="$f2"
else
echo "Can't find $f1 or $f2"
echo "Aborting the processing of $show"
failed=1
break
fi
#
# Collect file statistics and attributes
#
size="$( stat --printf='%s' "${filename}" )"
sha1sum="$( sha1sum "${filename}" | cut -f1 -d' ' )"
mime_type=$( file --dereference --brief --mime "${filename}" )
file_type=$( file --dereference --brief "${filename}" )
#
# Generate the next list of values to insert
#
printf "(%s,'%s','%s',%s,'%s','%s','%s')" \
"${episode_id}" "${show}.${extension}" "${extension}" "${size}" "${sha1sum}" "${mime_type}" "${file_type}" >> "$TMP1"
#
# End with a comma or a semicolon
#
if [[ $extension == 'wav' ]]; then
echo ";" >> "$TMP1"
else
echo "," >> "$TMP1"
fi
done
#
# Add the `UPDATE` too, unless we failed to find a file and aborted.
#
if [[ $failed -eq 0 ]]; then
printf 'UPDATE eps SET valid = 1 WHERE id = %s;\n' "${episode_id}" >> "$TMP1"
((argcount++))
fi
done
if [[ $argcount -gt 0 ]]; then
cat "$TMP1"
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

BIN
InternetArchive/ia.db Normal file

Binary file not shown.

187
InternetArchive/ia_db.sql Normal file
View File

@@ -0,0 +1,187 @@
/*
* ia_db.sql
* =========
*
* Schema for SQLite database 'ia.db' used to hold IA upload information
* Last updated: 2022-06-16
*
*/
/*
* Table: episodes
*
* id show number from HPR
* rdate release date from HPR
* title title from HPR
* summary summary from HPR
* uploaded Boolean (0,1) showing if an episode has been uploaded
* has_files Boolean (0,1) showing that there are assets for this
* show
* with_files Boolean (0,1) showing that the assets have been uploaded
* with_derived Boolean (0,1) true if we did our own "deriving" of audio
* (*.spx, *.opus, etc), preserving the tags, and these are
* on the IA rather than having been derived by the IA
* software (and having had their tags removed!)
* archive_date date uploaded to the IA
* item_last_updated epoch date of last update (from the IA software)
* IA_URL URL of the uploaded show
* notes any notes about the upload
*
*/
CREATE TABLE episodes (
id integer PRIMARY KEY,
rdate date NOT NULL,
title varchar(100) NOT NULL,
summary varchar(100) NOT NULL,
uploaded integer default 0,
has_files integer default 0,
with_files integer default 0,
with_derived integer default 0,
with_source integer default 0,
archive_date date,
item_last_updated integer default 0,
IA_URL text,
notes text
);
/*
* Table: assets
*
* id primary key
* episode_id link to episodes.id (show number the link was found in)
* URL URL of the asset
* filename filename (or path) component (after percent decoding)
* uploaded Boolean (0,1) showing if an asset has been uploaded
*
*/
CREATE TABLE assets (
id integer PRIMARY KEY,
episode_id integer REFERENCES episodes(id),
URL text NOT NULL,
filename text NOT NULL,
uploaded integer default 0
);
/*
* Index: assets_filename_idx
*
* Attempt to constrain duplicates in the assets table
*
*/
CREATE UNIQUE INDEX assets_filename_idx ON assets (episode_id, filename);
/*
* Table: dirlist
*
* id primary key
* filename filename or path under directory 'eps'
*
*/
CREATE TABLE dirlist (
id integer PRIMARY KEY,
filename text NOT NULL
);
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_view
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_view;
CREATE VIEW episodes_view AS
SELECT
e.id,
e.rdate,
e.title,
e.summary,
e.uploaded,
e.has_files,
e.with_files,
e.with_derived,
e.archive_date,
e.IA_URL,
CASE e.item_last_updated
WHEN 0 THEN null
ELSE datetime(e.item_last_updated,'unixepoch') END AS item_last_updated,
e.notes,
count(a.url) AS asset_count,
group_concat(a.filename) AS assets
FROM episodes e
LEFT JOIN assets a ON e.id = a.episode_id
GROUP BY e.id
ORDER BY e.id;
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_this_month (based on episodes_view)
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_this_month;
CREATE VIEW episodes_this_month AS
SELECT
*
FROM episodes_view
WHERE rdate between date('now','start of month')
AND date('now','start of month','+1 month','-1 day');
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_last_month
*
* Selects the rows from 'episodes_view' for the previous month
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_last_month;
CREATE VIEW episodes_last_month AS
SELECT
*
FROM episodes_view
WHERE rdate between date('now','start of month','-1 month')
AND date('now','start of month','-1 day');
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_next_month
*
* Selects the rows from 'episodes_view2' for the next month
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_next_month;
CREATE VIEW episodes_next_month AS
SELECT
*
FROM episodes_view
WHERE rdate between date('now','start of month','+1 month')
AND date('now','start of month','+2 month','-1 day');
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: re_upload
*
* Selects the rows from 'episodes_view' which need to be re-uploaded. Shows the
* last 5 upolads to give context.
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS re_upload;
CREATE VIEW re_upload AS
SELECT
*
FROM episodes_view
WHERE id
BETWEEN
(SELECT min(id)-5
FROM episodes_view
WHERE id BETWEEN 871 AND 2429
AND uploaded = 1
AND with_derived = 0
GROUP BY id)
AND 2429;
/*
* CREATE VIEW re_upload AS
* SELECT
* *
* FROM episodes_view
* WHERE id BETWEEN 871 AND 2429
* AND uploaded = 1
* AND with_derived = 0;
*
*/
-- vim: syntax=sql:ts=8:sw=4:ai:tw=80:et:fo=tcrqn21:fdm=marker:comments+=b\:--

View File

@@ -0,0 +1,29 @@
/* licenses.sql
*
* [American spelling, for consistency with the rest of the database]
*/
DROP TABLE IF EXISTS licenses;
CREATE TABLE IF NOT EXISTS licenses (
id int(5) AUTO_INCREMENT PRIMARY KEY,
short_name varchar(11) NOT NULL,
long_name varchar(40) NOT NULL,
url varchar(80) NOT NULL
) CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO licenses (short_name, long_name, url) VALUES
('CC-0', 'Public Domain Dedication', 'http://creativecommons.org/publicdomain/zero/1.0/'),
('CC-BY', 'Attribution', 'http://creativecommons.org/licenses/by/4.0'),
('CC-BY-SA', 'Attribution-ShareAlike', 'http://creativecommons.org/licenses/by-sa/3.0'),
('CC-BY-ND', 'Attribution-NoDerivs', 'http://creativecommons.org/licenses/by-nd/4.0'),
('CC-BY-NC', 'Attribution-NonCommercial', 'http://creativecommons.org/licenses/by-nc/4.0'),
('CC-BY-NC-SA', 'Attribution-NonCommercial-ShareAlike', 'http://creativecommons.org/licenses/by-nc-sa/4.0'),
('CC-BY-NC-ND', 'Attribution-NonCommercial-NoDerivs', 'http://creativecommons.org/licenses/by-nc-nd/4.0')
;
/*
vim: syntax=sql:ts=8:sw=4:et:ai:tw=75:
*/

2800
InternetArchive/make_metadata Executable file

File diff suppressed because it is too large Load Diff

41
InternetArchive/parse_ia.awk Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/awk -f
#-------------------------------------------------------------------------------
# Process tab-delimited data from the Internet Archive with a field name
# header, reporting particular fields. The algorithm is general though this
# instance is specific.
#
# This script is meant to be used thus:
# $ ia list -va hpr2450 | ./parse_ia.awk
# hpr2450.flac derivative
# hpr2450.mp3 derivative
# hpr2450.ogg derivative
# hpr2450.opus original
# hpr2450.spx original
# hpr2450.wav original
#
#-------------------------------------------------------------------------------
BEGIN {
FS = "\t"
}
#
# Read the header line and collect the fields into an array such that a search
# by field name returns the field number.
#
NR == 1 {
for (i = 1; i <= NF; i++) {
fld[$i] = i
}
}
#
# Read the rest of the data, reporting only the lines relating to chosen files
# and print the fields 'name' and 'source'
#
NR > 1 && $(fld["source"]) ~ /original/ {
printf "%-25s %s\n",$(fld["name"]),$(fld["source"])
}
# vim: syntax=awk:ts=8:sw=4:ai:et:tw=78:nu:rnu:

View File

@@ -0,0 +1,43 @@
#!/usr/bin/awk -f
#-------------------------------------------------------------------------------
# Process tab-delimited data from the Internet Archive with a field name
# header, reporting particular fields. The algorithm is general though this
# instance is specific.
#
# In this case we extract only the audio files
#
# This script is meant to be used thus:
# $ ia list -va hpr2450 | ./parse_ia_audio.awk
# hpr2450.flac derivative
# hpr2450.mp3 derivative
# hpr2450.ogg derivative
# hpr2450.opus original
# hpr2450.spx original
# hpr2450.wav original
#
#-------------------------------------------------------------------------------
BEGIN {
FS = "\t"
}
#
# Read the header line and collect the fields into an array such that a search
# by field name returns the field number.
#
NR == 1 {
for (i = 1; i <= NF; i++) {
fld[$i] = i
}
}
#
# Read the rest of the data, reporting only the lines relating to audio files
# and print the fields 'name' and 'source'
#
NR > 1 && $(fld["name"]) ~ /[^.]\.(flac|mp3|ogg|opus|spx|wav)/ {
printf "%-15s %s\n",$(fld["name"]),$(fld["source"])
}
# vim: syntax=awk:ts=8:sw=4:ai:et:tw=78:nu:rnu:

479
InternetArchive/past_upload Executable file
View File

@@ -0,0 +1,479 @@
#!/bin/bash -
#===============================================================================
#
# FILE: past_upload
#
# USAGE: ./past_upload [-h] [-r] [-v] [-d {0|1}] start [count]
#
# DESCRIPTION: Run the commands necessary to upload a batch of older HPR
# shows to archive.org
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.12
# CREATED: 2021-04-17 22:14:16
# REVISION: 2022-07-07 16:17:41
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
VERSION="0.0.12"
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
UPLOAD="$BASEDIR/uploads"
;;
borg)
BASEDIR="$HOME/IA"
UPLOAD="/data/IA/uploads"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || exit 1
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#
# Log file
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#=== FUNCTION ================================================================
# NAME: _verbose
# DESCRIPTION: Writes a message in verbose mode
# PARAMETERS: $1 message
# RETURNS: Nothing
#===============================================================================
_verbose () {
local msg=${1:-}
[[ $VERBOSE -eq 1 ]] && echo "$msg"
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-r] [-v] [-d {0|1}] start [count]
Generates the necessary metadata and script and uses them to upload HPR audio
and other show-related files held on the VPS to the Internet Archive. This
script is similar to 'weekly_upload' but it's for dealing with older shows
where we only have the MP3 audio.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is changed but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-F Force an upload even if the items are already on the
IA. Use with *GREAT* caution!
-m Update the item's metadata from the file generated
for (re-)uploads. This ensures that any changes to the
notes, summary, tags, etc are propagated. This does
not happen by default, but shows with assets are
always updated this way.
-r Run in 'remote' mode, using the live database over an
(already established) SSH tunnel. Default is to run
against the local database.
-Y Answer 'Y' to the confirmation question (really don't
ask at all)
Arguments:
start the starting show number to be uploaded
count (optional, default 1) the number of shows to be
uploaded; not allowed to exceed 20
Notes:
1. When running on 'borg' the method used is to run in faux 'local' mode.
This means we have an open tunnel to the HPR server (mostly left open) and
the default file .hpr_db.cfg points to the live database via this tunnel.
So we do not use the -r option here. This is a bit of a hack! Sorry!
TODO: Needs fix!
2. There are potential problems when a show has no tags which haven't been
fully resolved. The make_metadata script fails in default mode when it
finds such a show, but this (weekly_upload) script can continue on and run
the generated script which uploads the source audio files. This can mean
the IA items end up as books! In this mode the description is not stored
and so there are no show notes.
endusage
exit "$result"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Prerequisites
#
jq=$(command -v jq)
[ -z "$jq" ] && { echo "Needs the 'jq' JSON filter"; exit 1; }
ia=$(command -v ia)
[ -z "$ia" ] && { echo "Needs the 'ia' Internet Archive script"; exit 1; }
transfer_tags=$(command -v transfer_tags)
[ -z "$transfer_tags" ] && { echo "Needs the 'transfer_tags' script"; exit 1; }
tunnel_is_open=$(command -v tunnel_is_open)
[ -z "$tunnel_is_open" ] && { echo "Needs the 'tunnel_is_open' script"; exit 1; }
[ -e "$BASEDIR/transcode" ] || {
echo "Needs the 'transcode' script"
exit 1
}
[ -e "$BASEDIR/make_metadata" ] || {
echo "Needs the 'make_metadata' script"
exit 1
}
#
# Constant
#
RETRIES=5
#
# Check the tunnel is open
#
if ! tunnel_is_open; then
echo "Open the tunnel before running this script (open_tunnel)"
exit 1
fi
#-------------------------------------------------------------------------------
# Process options
#-------------------------------------------------------------------------------
while getopts :d:FhmrvY opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
F) FORCE=1;;
h) _usage 1;;
m) METADATA=1;;
r) REMOTE=1;;
v) VERBOSE=1;;
Y) YES=1;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
#
# Check choices and set defaults
#
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
FORCE=${FORCE:-0}
METADATA=${METADATA:-0}
YES=${YES:-0}
VERBOSE=${VERBOSE:-0}
REMOTE=${REMOTE:-0}
if [[ $REMOTE -eq 0 ]]; then
dbconfig="$BASEDIR/.hpr_db.cfg"
_verbose "Local database mode"
else
dbconfig="$BASEDIR/.hpr_livedb.cfg"
_verbose "Remote database mode"
fi
#
# Check argument count
#
if [[ ! ( $# -eq 1 || $# -eq 2 ) ]]; then
echo "Wrong number of arguments"
_usage 1
fi
#
# Validate arguments
#
for arg; do
if [[ ! $arg =~ ^[0-9]{1,4}$ ]]; then
echo "Invalid number: $arg"
echo "Use a plain number"
exit 1
fi
done
#
# Set variables for the range of shows
#
start=$1
count=${2:-1}
if [[ $count -gt 20 ]]; then
echo "Can't process more than 20 shows at a time"
exit 1
fi
((end = start + count - 1))
[[ $DRYRUN -eq 1 ]] && _verbose "Dry run mode"
if [[ $VERBOSE -eq 1 ]]; then
echo "Processing $count $(ngettext show shows "$count") from $start"
else
echo "${start}..${end}"
fi
#
# Log the start of this run
#
[[ $DRYRUN -eq 0 ]] && \
echo "$(date +%Y%m%d%H%M%S) Processing ${start}..${end} (v$VERSION)" >> "$LOGFILE"
#
# Store the show numbers in an array. We need 'eval' to substitute `$start'
# and '$end' for the 'printf'.
#
declare -a shows
mapfile -t shows < <(eval "printf '%04d\n' {$start..$end}")
#
# Walk the array and delete elements that are already on the IA
#
if [[ $FORCE -eq 1 ]]; then
_verbose 'Not checking for shows on archive.org; forcing!'
[[ $DRYRUN -eq 0 ]] && echo "$(date +%Y%m%d%H%M%S) Forcing an update (-F)" >> "$LOGFILE"
else
_verbose 'Checking for shows on archive.org'
[[ $DRYRUN -eq 0 ]] && echo "$(date +%Y%m%d%H%M%S) Checking archive.org" >> "$LOGFILE"
i=0
for item in "${shows[@]}"; do
if ia list "hpr$item" > /dev/null 2>&1; then
_verbose "Found hpr$item on archive.org"
unset "shows[$i]"
fi
((i++))
done
fi
#
# Stop if there's nothing to do
#
if [[ ${#shows[@]} -eq 0 ]]; then
echo "Nothing to do; nominated show(s) are currently on archive.org"
[[ $DRYRUN -eq 0 ]] && echo "$(date +%Y%m%d%H%M%S) Nothing to do" >> "$LOGFILE"
exit 1
else
_verbose "There $(ngettext 'is 1 show' "are ${#shows[@]} shows" "${#shows[@]}") to process"
fi
#
# Find which audio needs to be downloaded and go get it
#
_verbose "Downloading missing audio..."
if [[ $DRYRUN -eq 1 ]]; then
echo "Would have attempted to download ${#shows[@]} $(ngettext show shows "${#shows[@]}") (dry run)"
else
for item in "${shows[@]}"; do
if [[ ! -e $UPLOAD/hpr$item.mp3 ]]; then
echo "Downloading hpr$item.mp3"
wget -q "http://hackerpublicradio.org/local/hpr$item.mp3" \
-O "$UPLOAD/hpr$item.mp3"
_verbose "Downloaded $UPLOAD/hpr$item.mp3"
else
_verbose "$UPLOAD/hpr$item.mp3 already exists"
fi
done
fi
#
# Transcode the audio as needed
#
_verbose "Transcoding missing audio..."
if [[ $DRYRUN -eq 1 ]]; then
echo "Would have transcoded ${#shows[@]} $(ngettext show shows "${#shows[@]}") (dry run)"
else
[[ $DRYRUN -eq 0 ]] && \
echo "$(date +%Y%m%d%H%M%S) Transcoding ${#shows[@]} $(ngettext show shows "${#shows[@]}")" >> "$LOGFILE"
for item in "${shows[@]}"; do
if [[ $VERBOSE -eq 1 ]]; then
./transcode -v "$UPLOAD/hpr$item.mp3"
else
./transcode "$UPLOAD/hpr$item.mp3"
fi
done
fi
#
# We now have a list of shows in the right state to be uploaded, so we can do
# what's necessary
#
_verbose "Uploading $(ngettext show shows "${#shows[@]}")..."
#
# Define files for make_metadata. For aesthetic reasons don't use '1-1' when
# there's only one show!
#
if [[ $start -eq $end ]]; then
printf -v metadata 'metadata_%04d.csv' "$start"
printf -v script 'script_%04d.sh' "$start"
else
printf -v metadata 'metadata_%04d-%04d.csv' "$start" "$end"
printf -v script 'script_%04d-%04d.sh' "$start" "$end"
fi
#
# Check on the dry-run choice
#
if [[ $DRYRUN -eq 1 ]]; then
echo "Dry run: Would have uploaded $count $(ngettext show shows "$count") from $start"
echo "Dry run: Would have created $metadata and $script"
echo "Dry run: Would have uploaded $metadata and run $script"
echo "Dry run: Would have used $dbconfig"
echo -n "Dry run: Would have done metadata updates for "
if [[ $METADATA -eq 0 ]]; then
echo "shows with assets"
else
echo "all shows"
fi
else
#
# Really do the upload
#
if [[ $start -eq $end ]]; then
echo "Uploading $start"
else
echo "Uploading $start to $end inclusive"
fi
#
# Implement the -Y (override) option
#
if [[ $YES -eq 1 ]]; then
confirmed=1
else
echo "$(date +%Y%m%d%H%M%S) Waiting for confirmation" >> "$LOGFILE"
if yes_no "OK to continue? %s " "N"; then
confirmed=1
else
confirmed=0
fi
fi
#---------------------------------------------------------------------------
# Do the work
#---------------------------------------------------------------------------
if [[ $confirmed -eq 1 ]]; then
# shellcheck disable=2086
{
#
# Make the metadata
#
_verbose "Running make_metadata"
$BASEDIR/make_metadata -dbconf=${dbconfig} \
-from=$start -count=$count \
-verb -out -script -a_count=$TMP1
RES=$?
#
# If it all went OK perform the uploads, otherwise report the
# problem(s)
#
if [[ $RES -eq 0 ]]; then
_verbose "Uploading audio and any assets"
ia upload --retries=$RETRIES --spreadsheet=${metadata} \
-H x-archive-keep-old-version:0 && \
[ -e $script ] && ./${script}
echo "$(date +%Y%m%d%H%M%S) Uploaded shows" >> "$LOGFILE"
else
echo "Upload aborted due to errors"
echo "$(date +%Y%m%d%H%M%S) Upload failed due to errors" >> "$LOGFILE"
exit 1
fi
#
# Update metadata for all shows if requested
#
if [[ $METADATA -eq 1 ]]; then
_verbose "Uploading changed metadata"
ia metadata --spreadsheet=${metadata}
echo "$(date +%Y%m%d%H%M%S) Metadata uploaded for all shows" >> "$LOGFILE"
else
#
# We aren't updating metadata for all, but if any shows had
# assets we need to do metadata updates. The show details are
# in the temporary file $TMP1
#
if [[ -s $TMP1 ]]; then
_verbose "Refreshing metadata for shows with assets"
declare -a mshows
mapfile -t mshows < <(cut -f1 -d' ' $TMP1 | sed -e 's/^hpr//' | sort)
mlist="${mshows[*]}"
if [[ ${#mshows[@]} -eq 1 ]]; then
printf -v metadata 'meta_metadata_%04d.csv' "${mshows[0]}"
else
printf -v metadata 'meta_metadata_%04d-%04d.csv' "${mshows[0]}" "${mshows[-1]}"
fi
_verbose "Regenerating metadata"
$BASEDIR/make_metadata -dbconf=${dbconfig} -list="${mlist/ /,}" \
-out=${metadata} -meta -noassets -verb
RES=$?
if [[ $RES -eq 0 ]]; then
_verbose "Uploading new metadata"
ia metadata --spreadsheet=${metadata}
echo "$(date +%Y%m%d%H%M%S) Metadata uploaded for eligible shows" >> "$LOGFILE"
else
echo "Metadata update aborted due to errors"
echo "$(date +%Y%m%d%H%M%S) Metadata upload failed due to errors" >> "$LOGFILE"
exit 1
fi
fi
fi
}
else
echo "Not uploaded"
echo "$(date +%Y%m%d%H%M%S) Upload aborted" >> "$LOGFILE"
fi
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

453
InternetArchive/repair_item Executable file
View File

@@ -0,0 +1,453 @@
#!/bin/bash -
#===============================================================================
#
# FILE: repair_item
#
# USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] itemname
#
# DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during
# the upload.
#
# The most common failures are caused by the file upload
# processes timing out and being aborted (by the 'ia' tool which
# performs the item creation and the uploads). This failure
# means that a show being processed on 'borg' does not get all
# of the components loaded to the IA.
#
# This script looks at the files belonging to the show (stored
# temporarily on 'borg') and determines which have not been
# uploaded, then takes steps to perform the uploads.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.6
# CREATED: 2020-01-05 22:42:46
# REVISION: 2024-05-10 12:39:52
#
#===============================================================================
#set -o nounset # Treat unset variables as an error
VERSION="0.0.6"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory for the host
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
UPLOADS="$HOME/HPR/IA/uploads"
;;
borg)
BASEDIR="$HOME/IA"
UPLOADS="/data/IA/uploads"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; }
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#
# Enable coloured messages
#
define_colours
#
# Sanity checks
#
JQ=$(command -v jq)
[ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; }
IA=$(command -v ia)
[ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; }
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
# {{{ -- Functions -- Upload, exists_in, queued_tasks, _DEBUG, _usage
#=== FUNCTION ================================================================
# NAME: Upload
# DESCRIPTION: Upload a file to the Internet Archive with various options
# PARAMETERS: 1 - the item id (e.g. 'hpr1234'
# 2 - the path to the file for upload
# 3 - (optional) the path to the file on the IA
# 4 - (optional) list of options for 'ia upload' enclosed as
# a string
# RETURNS: Nothing
#===============================================================================
Upload () {
local id=${1}
local file=${2}
local remote=${3:-}
local options=${4:-}
if [[ -e $file ]]; then
if [[ -z $remote ]]; then
# shellcheck disable=SC2086
ia upload ${id} ${file} ${options}
else
# shellcheck disable=SC2086
ia upload ${id} ${file} --remote-name=${remote} ${options}
fi
else
echo "File missing: $file"
fi
}
#=== FUNCTION ================================================================
# NAME: exists_in
# DESCRIPTION: Checks the existence of a key in an associative array
# PARAMETERS: $1 array name
# $2 key value
# RETURNS: True if the key exists, False otherwise
#
# Modified from
# https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array
#===============================================================================
exists_in () {
# shellcheck disable=SC2086
eval '[ ${'$1'[$2]+muahaha} ]'
}
#=== FUNCTION ================================================================
# NAME: queued_tasks
# DESCRIPTION: Queries the IA for any queued or running tasks for an item.
# Writes the number to STDOUT so it can be captured.
# PARAMETERS: $1 IA item (like hpr1192)
# RETURNS: Nothing
#===============================================================================
queued_tasks () {
local item="${1:?Usage: queued_tasks item}"
local -i count=0
count="$(ia tasks "$item" |\
jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
echo "$count"
return
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] item
Attempts to repair an IA item where the upload has failed for some reason.
Options:
-h Print this help
-v Run in verbose mode where more information is
reported. Default is off.
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is changed but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-D Run in debug mode where a lot more information is
reported
-l N Control the number of shows that can be uploaded at
once. The range is 1 to $DEFLIMIT.
Arguments:
item The item in the form 'hpr1234'
endusage
exit "$result"
}
# }}}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Directories and files
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Constants
#
DEFLIMIT=20
#
# Process options
#
while getopts :d:Dhl:v opt
do
case "${opt}" in
D) DEBUG=1;;
d) DRYRUN=$OPTARG;;
h) _usage 0;;
l) LIMIT=$OPTARG;;
v) VERBOSE=1;;
*) echo "** Unknown option"
_usage 1;;
esac
done
shift $((OPTIND - 1))
#
# Set option defaults and check their values
#
VERBOSE=${VERBOSE:-0}
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $VERBOSE -eq 1 && $DRYRUN -eq 1 ]] && echo "Dry run mode"
DEBUG=${DEBUG:-0}
[[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode"
LIMIT=${LIMIT:-$DEFLIMIT}
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
_usage 1
fi
#
# Should have one argument
#
if [[ $# != 1 ]]; then
coloured 'red' "Missing argument"
_usage 1
fi
item="${1}"
#
# Ensure item spec is correctly formatted
#
if [[ $item =~ hpr([0-9]+) ]]; then
printf -v item 'hpr%04d' "${BASH_REMATCH[1]}"
else
coloured 'red' "Incorrect show specification: $item"
coloured 'yellow' "Use 'hpr9999' format"
exit 1
fi
_DEBUG "Parsed item: $item"
#
# Declarations
#
declare -A fcache
declare -A iacache
declare -a missed
#
# Scan the directory 'UPLOADS' where files for upload to the IA are stored and
# collect everything for this item (show).
#
# See the `find' pipeline at the end of the loop which selects only files, not
# directories. It outputs the last change time and the full file path, sorts
# on the time, then removes it. This ensures we process the files in time
# order rather than alphabetic order of their names.
#
# TODO: This algorithm is from another script and is not needed here. The
# order of processing is irrelevant here so simplify the 'find' and the loop.
# We are only looking for the 'item' specified by the argument, not other
# ones.
#
while read -r path; do
relpath="${path#"$UPLOADS"/}"
item="${relpath:0:7}"
[[ $VERBOSE -eq 1 ]] && echo "Found $path"
_DEBUG "Path: $path"
_DEBUG "Relative path: $relpath"
_DEBUG "IA item: $item"
if ! exists_in fcache "$relpath"; then
# shellcheck disable=SC2034
fcache[$relpath]=1
fi
done < <(find "$UPLOADS" -type f -regextype posix-extended \
-regex ".*$item.*" -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort | cut -f2 -d' ')
#
# Did we find anything?
#
if [[ ${#fcache[@]} -eq 0 ]]; then
coloured 'red' "No files found for item $item in $UPLOADS"
coloured 'red' "Can't continue"
exit 1
fi
#
# Look to see if there are any tasks queued for this show on the IA servers.
# If there are we can't continue.
#
# TODO: This could be a loop waiting for tasks to complete rather than
# aborting and asking to be rerun.
#
tasks=$(queued_tasks "$item")
if [[ $tasks -gt 0 ]]; then
coloured 'red' \
"Item $item still has $tasks unfinished $(ngettext task tasks "$tasks")"
coloured 'red' "Allow time for task(s) to finish and try again later"
exit 1
fi
#
# Interrogate the IA for the required item contents. If it returns True we can
# collect its contents, otherwise we can't proceed. The file 'TMP1' contains
# just a simple list of the files on the IA relating to this item.
#
if ia list "$item" > "$TMP1"; then
while read -r iafile; do
# shellcheck disable=SC2034
iacache[$iafile]=1
done < "$TMP1"
else
coloured 'red' "Item $item is not in the IA"
coloured 'red' "Can't continue"
exit 1
fi
#
# Look through the list of files we found and detect any not on the IA
#
for path in "${!fcache[@]}"; do
if ! exists_in iacache "$path"; then
missed+=("$path")
fi
done
#
# Counters and defaults for the loop
#
retry_threshold=5
sleeptime=20
failures=0
upload_count=0
#
# If there are missed files we can report what we'd be doing or do it,
# otherwise we have nothing to do.
#
if [[ ${#missed[@]} -eq 0 ]]; then
coloured 'green' "All expected files for item $item are on the IA"
else
mcount="${#missed[@]}"
coloured 'red' "There $(ngettext "is 1 missing file" "are $mcount missing files" "$mcount"):"
[[ $DRYRUN -eq 1 ]] && {
coloured 'blue' "Dry run: Would have run the following command(s):"
}
for file in "${missed[@]}"; do
cmd="Upload $item $UPLOADS/$file "
cmd+="'$file' '--retries=5 --no-derive -H x-archive-keep-old-version:0'"
if [[ $DRYRUN -eq 1 ]]; then
coloured 'yellow' "$cmd"
else
retries=0
printf 'Uploading %s\n' "$file"
#
# Run 'cmd'. If it succeeds then write to the log and loop for the
# next missing file. If it fails enter the 'until' loop and report
# and the problem. Count the number of times this is done, so it
# doesn't loop forever. If we have reached the limit count this as
# a failure and continue the parent loop (with the next missing
# file). If we haven't retried enough yet, sleep for a while and
# try again. The intention is to catch the case when an upload
# times out. The 'ia' command is performing its own retries per
# upload when the system is overloaded, but these are non-fatal.
#
until eval "$cmd"; do
coloured 'red' "Failure when invoking the Upload command!"
((retries++))
printf '%s Failed to upload %s to the IA [%d]\n' \
"$(date +%Y%m%d%H%M%S)" "$file" "$retries" >> "$LOGFILE"
[ "$retries" -eq "$retry_threshold" ] && {
((failures++))
continue 2
}
sleep $sleeptime
done # until eval ...
echo "$(date +%Y%m%d%H%M%S) Uploaded $file to the IA" >> "$LOGFILE"
fi
#
# Count actual uploads and dry-run ones the same
#
((upload_count++))
#
# Stop the missed file loop if we have reached the limiting number, in
# dry-run and live mode
#
[[ $upload_count -eq $LIMIT ]] && {
coloured 'blue' "Upload limit ($LIMIT) reached"
break
}
done # for file in ...
fi
#
# Summarise how many upload failures were detected
#
if [[ $failures -gt 0 ]]; then
coloured 'red' \
"There $(ngettext "was $failures upload failure" "were $failures upload failures" $failures)"
coloured 'yellow' 'Run this script again to repeat the repair attemmpt'
fi
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker

247
InternetArchive/replace_derived Executable file
View File

@@ -0,0 +1,247 @@
#!/bin/bash -
#===============================================================================
#
# FILE: replace_derived
#
# USAGE: ./replace_derived show_number
#
# DESCRIPTION: Given a show that has already been uploaded to the IA, upload
# a locally derived set of files to replace those generated by
# the IA themselves. The IA's process of deriving files does not
# propagate the tags, which we really want to do.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: This is to be run on the HPR VPS
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.5
# CREATED: 2018-01-05 19:08:32
# REVISION: 2019-01-27 15:55:58
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.5"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory
#
case $(hostname) in
i7-desktop) BASEDIR="$HOME/HPR/InternetArchive";;
hprvps|marvin) BASEDIR="$HOME/IA";;
*) echo "Wrong host!"; exit 1;;
esac
cd "$BASEDIR" || exit 1
#=== FUNCTION ================================================================
# NAME: yes_no
# DESCRIPTION: Read a Yes or No response from STDIN (only these values are
# accepted) and return a suitable numeric value.
# PARAMETERS: 1 - Prompt string for the read
# 2 - Default value (optional)
# RETURNS: 0 for a response of Y or YES, 1 otherwise
#===============================================================================
yes_no () {
local prompt="${1:?Usage: ${FUNCNAME[0]} prompt [default]}"
local default="${2^^}"
local ans res
if [[ $prompt =~ %s ]]; then
if [[ -n $default ]]; then
default=${default:0:1}
# shellcheck disable=SC2059
# {
case "$default" in
Y) printf -v prompt "$prompt" "[Y/n]" ;;
N) printf -v prompt "$prompt" "[y/N]" ;;
*) echo "Error: ${FUNCNAME[0]} @ line ${BASH_LINENO[0]}: Default must be 'Y' or 'N'"
exit 1
;;
esac
# }
else
echo "Error: ${FUNCNAME[0]} @ line ${BASH_LINENO[0]}: Default required"
exit 1
fi
fi
#
# Loop until a valid input is received
#
while true; do
#
# Read and handle CTRL-D (EOF)
#
read -r -e -p "$prompt" ans
res="$?"
if [[ $res -ne 0 ]]; then
echo "Read aborted"
return 1
fi
[ -z "$ans" ] && ans="$default"
#
# Look for valid replies and return appropriate values. Print an error
# message otherwise and loop around for another go
#
if [[ ${ans^^} =~ ^Y(E|ES)?$ ]]; then
return 0
elif [[ ${ans^^} =~ ^NO?$ ]]; then
return 1
else
echo "Invalid reply; please use 'Y' or 'N'"
fi
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: None
# RETURNS: Nothing
#===============================================================================
_usage () {
local exitcode=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} show_number
For a show already on the archive it determines if the files are those derived
by the IA software and if not collects the WAV file (if necessary) and
generates all the derivatives (if necessary), then it uploads these
replacements. This is necessary because the IA's derivation process does not
copy the audio tags across to the derived files whereas we do.
endusage
exit "$exitcode"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Make temporary files and set traps to delete them
#
# TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
# trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
UPLOADS="/var/IA/uploads"
#
# Don't run on the wrong system
#
HOST=${HOSTNAME:-${hostname}}
if [[ $HOST != 'hprvps' ]]; then
echo "This should be run on the HPR VPS"
exit 1
fi
#
# We need a show number argument
#
if [[ $# -ne 1 ]]; then
_usage 1
fi
show_number=$1
#
# Tools we need: the Awk script 'parse_ia_audio.awk', and the script
# 'transcode'
#
PARSE_IA_AUDIO="$BASEDIR/parse_ia_audio.awk"
[ -e "$PARSE_IA_AUDIO" ] || { echo "$SCRIPT: Unable to find $PARSE_IA_AUDIO"; exit 1; }
TRANSCODE="$BASEDIR/transcode"
[ -e "$TRANSCODE" ] || { echo "$SCRIPT: Unable to find $TRANSCODE"; exit 1; }
#
# Build the file path and the identifier we're looking for
#
WAV="$UPLOADS/hpr${show_number}.wav"
IDENTIFIER="hpr${show_number}"
#
# Look for the WAV file in the upload area
#
if [[ -e $WAV ]]; then
echo "The WAV file already exists"
else
#
# Not found. Check it's on the archive
#
if ia metadata --exists "$IDENTIFIER" > /dev/null 2>&1; then
#
# It's there. Report the status of original and derived
#
echo "Need to download ${WAV##*/}"
ia list -va "$IDENTIFIER" | $PARSE_IA_AUDIO
#
# Download the WAV
#
if yes_no "OK to download? %s " N; then
echo "Downloading"
ia download "$IDENTIFIER" "${WAV##*/}" --stdout > "$WAV"
RES=$?
[ $RES -gt 0 ] && { echo "Download failed"; exit 1; }
else
echo "Download cancelled"
exit 1
fi
else
echo "This episode is not in the archive"
exit 1
fi
fi
#
# Did we already transcode this one?
#
tally=0
for fmt in flac mp3 ogg opus spx; do
if [[ -e "$UPLOADS/hpr${show_number}.${fmt}" ]]; then
((tally++))
fi
done
#
# We have a WAV file so transcode it unless we found transcoded files
#
if [[ $tally -gt 0 ]]; then
echo "There are already $tally derived files for this show"
echo "Not transcoding"
else
if yes_no "OK to transcode? %s " N; then
$TRANSCODE "$WAV"
else
echo "Transcode cancelled"
exit 1
fi
fi
if yes_no "OK to upload? %s " N; then
ia upload "$IDENTIFIER" "${WAV%wav}"{flac,mp3,ogg,opus,spx}
else
echo "Upload cancelled"
exit 1
fi
if yes_no "OK to delete WAV and derived files? %s " N; then
rm -f "${WAV%wav}"*
else
echo "Deletion cancelled"
exit 1
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

71
InternetArchive/show_metadata Executable file
View File

@@ -0,0 +1,71 @@
#!/usr/bin/env perl
#===============================================================================
#
# FILE: show_metadata
#
# USAGE: ./show_metadata
#
# DESCRIPTION: Script to display a file of metadata in a readable format
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2014-06-09 16:17:11
# REVISION: 2014-06-15 12:26:58
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use List::Util qw{max};
use Text::CSV_XS;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';
#
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
my $mdata = shift;
die "Usage: $PROG filename\n" unless $mdata;
die "File $mdata does not exist\n" unless -e $mdata;
my $csv = Text::CSV_XS->new({ binary => 1 });
open( my $fh, "<:encoding(utf8)", $mdata )
or die "Unable to open $mdata: $!\n";
my @cols = @{ $csv->getline($fh) };
my $max = max map { length($_) } @cols;
my $row = {};
$csv->bind_columns( \@{$row}{@cols} );
while ( $csv->getline($fh) ) {
foreach my $key (@cols) {
printf "%*s: %s\n", $max, $key, $row->{$key};
}
print '-' x 80, "\n";
}
close($fh);
exit;
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

454
InternetArchive/tidy_uploaded Executable file
View File

@@ -0,0 +1,454 @@
#!/bin/bash -
#===============================================================================
#
# FILE: tidy_uploaded
#
# USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT]
#
# DESCRIPTION: Relocates HPR audio and other show-related files on 'borg'
# after their shows have been uploaded to the Internet Archive
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.10
# CREATED: 2022-03-30 17:38:01
# REVISION: 2022-07-30 14:30:43
#
#===============================================================================
set -o nounset # Treat unset variables as an error
VERSION="0.0.10"
SCRIPT=${0##*/}
# DIR=${0%/*}
STDOUT="/dev/fd/2"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit 1; }
# shellcheck disable=SC1090
source "$LIB"
#
# Make temporary files and set traps to delete them
#
TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; }
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#
# Configure depending whether local or on the VPS
#
case $HOSTNAME in
borg) BASEDIR="$HOME/InternetArchive"
UPLOADS="/data/IA/uploads"
ARCHIVE="/data/IA/done" ;;
i7-desktop) BASEDIR="$HOME/HPR/InternetArchive"
UPLOADS="$HOME/HPR/IA/uploads"
ARCHIVE="$HOME/HPR/IA/done";;
*) echo "Wrong host!"; exit 1 ;;
esac
#=== FUNCTION ================================================================
# NAME: exists_in
# DESCRIPTION: Checks the existence of a key in an associative array
# PARAMETERS: $1 array name
# $2 key value
# RETURNS: True if the key exists, False otherwise
#
# Modified from
# https://stackoverflow.com/questions/13219634/easiest-way-to-check-for-an-index-or-a-key-in-an-array
#===============================================================================
exists_in () {
# shellcheck disable=SC2086
eval '[ ${'$1'[$2]+muahaha} ]'
}
#=== FUNCTION ================================================================
# NAME: queued_tasks
# DESCRIPTION: Queries the IA for any queued or running tasks for an item.
# Writes the number to STDOUT so it can be captured.
# PARAMETERS: $1 IA item (like hpr1192)
# RETURNS: Nothing
#===============================================================================
queued_tasks () {
local item="${1:?Usage: queued_tasks item}"
local -i count=0
count="$(ia tasks "$item" |\
jq -s '[.[] | if .category == "catalog" then .status else empty end] | length')"
echo "$count"
return
}
#=== FUNCTION ================================================================
# NAME: movefile
# DESCRIPTION: Moves a file to a new place, catering for any directories in
# the path
# PARAMETERS: $1 directory to move form
# $2 directory to move to
# $3 file (or sub-path to move)
# RETURNS: True if a move was done, otherwise False
#===============================================================================
movefile () {
local fromdir="${1:?Usage: movefile fromdir todir path}"
local todir="${2:?Usage: movefile fromdir todir path}"
local path="${3:?Usage: movefile fromdir todir path}"
#
# Chop up the path. If it's just a file name then $dir and $file are the
# same, in which case we make $dir empty.
#
local dir="${path%/*}"
local file="${path##*/}"
[[ $dir = "$file" ]] && dir=''
#
# If we have a directory in the path check it exists in the 'to' directory
# and create it if not
#
if [[ -n $dir ]]; then
if [[ ! -d $dir ]]; then
mkdir -p "$todir/$dir"
fi
fi
#
# Does the file exist already?
# TODO: Compare the two files?
#
if [[ -e $todir/$path ]]; then
echo "File already exists: $todir/$path"
return 1
else
mv "$fromdir/$path" "$todir/$path"
echo "Moved $fromdir/$path"
return 0
fi
}
#=== FUNCTION ================================================================
# NAME: is_empty
# DESCRIPTION: Check whether a directory is empty (of files)
# PARAMETERS: $1 Directory to test
# RETURNS: True if empty (of files), otherwise false
#===============================================================================
is_empty() {
test -z "$(find "$1" -mindepth 1 -type f -printf X -quit)"
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: 1 [optional] exit value
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-v] [-c COUNT] [-d {0|1}] [-D]
Moves HPR audio and other show-related files on 'borg' after their shows
have been uploaded to the Internet Archive. Files to be uploaded are in the
directory ${UPLOADS} and they are moved to the directory ${ARCHIVE}.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is moved but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-c COUNT Count of shows to process. If omitted or zero then all
shows will be processed, otherwise this is the number
to stop at.
-D Run in debug mode where a lot more information is
reported
Examples
./tidy_uploaded # Run in (default) dry-run mode
./tidy_uploaded -v # Dry-run mode with verbose messages
./tidy_uploaded -d0 # Live mode (without verbose messages)
./tidy_uploaded -c1 # Process 1 show in dry-run mode
./tidy_uploaded -D # Run with debugging enabled
endusage
exit "$res"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Directories and files
#
LOGS="$BASEDIR/logs"
LOGFILE="$LOGS/$SCRIPT.log"
#
# Process options
#
while getopts :c:d:Dhv opt
do
case "${opt}" in
c) COUNT=$OPTARG;;
D) DEBUG=1;;
d) DRYRUN=$OPTARG;;
h) _usage 0;;
v) VERBOSE=1;;
*) echo "** Unknown option"
_usage 1;;
esac
done
shift $((OPTIND - 1))
COUNT=${COUNT:-0}
if [[ ! $COUNT =~ ^[0-9]+$ ]]; then
echo "** Use a numeric argument with -c"
_usage 1
fi
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
[[ $DRYRUN -eq 1 ]] && echo "Dry run mode"
VERBOSE=${VERBOSE:-0}
DEBUG=${DEBUG:-0}
[[ $DEBUG -eq 1 ]] && echo "Debug mode"
#
# Should have no arguments
#
if [[ $# != 0 ]]; then
echo "** ${SCRIPT} takes no arguments"
_usage 1
fi
#
# Declarations
#
declare -A seen
declare -a dirs
# lastitem=
ind=0
#
# Scan the directory 'UPLOADS' where files for upload to the IA are stored.
#
# See the `find' pipeline at the end of the loop which outputs the last change
# time and the full file path, sorts on the time, then removes it. This
# ensures we process the files in time order rather than alphabetic order of
# their names.
#
while read -r path; do
#
# Extract the path relative to $UPLOADS and the IA item name from the
# returned path. Here $relpath will be the filename or a sub-directory and
# filename, and $item will be the IA identifier like 'hpr1192'.
#
relpath="${path#"$UPLOADS"/}"
item="${relpath:0:7}"
[[ $VERBOSE -eq 1 ]] && echo "Found $path"
_DEBUG "Path: $path"
_DEBUG "Relative path: $relpath"
_DEBUG "IA item: $item"
#
# Detect that the item prefix has changed. If it has we're processing
# a new IA identifier, so work on this one
#
# If we have seen this item before we don't need to process it, so just
# skip this loop iteration
#
#
# Never seen before, so process it
#
if ! exists_in seen "$item"; then
# shellcheck disable=SC2034
seen[$item]=1
#
# Count this item and stop the loop if we've reached the requested
# count. We want the value of $ind to be the number of shows
# processed, so adjust it if we stopped after incrementing it.
#
((ind++))
if [[ $COUNT -gt 0 ]]; then
if [[ $ind -gt $COUNT ]]; then
((ind--))
break
fi
echo "[ Show #$ind ]"
fi
#
# Look to see if there are any tasks queued for this show. If there
# are we'll skip it just now.
#
tasks=$(queued_tasks "$item")
if [[ $tasks -gt 0 ]]; then
echo "** Item $item still has $tasks unfinished " \
"$(ngettext task tasks "$tasks")"
echo "** Skipping to the next item"
continue
fi
[[ $VERBOSE -eq 1 ]] && echo "Checking IA for $item"
#
# Interrogate the IA for the item we're working on. If it returns True
# we can proceed with tidying. The file 'TMP1' contains just a simple
# list of the files on the IIA relating to this item.
#
if ia list "$item" > "$TMP1"; then
#
# Save any directory associated with this item. This means that
# directories with names that don't conform to the "^hpr[0-9]{4}"
# pattern will be ignored, but this it *not* expected to happen.
# Note that directories without corresponding audio will not be
# cleaned up by this method, but again this is not expected to
# happen.
# TODO: be alert to such issues!
#
dirpath="$UPLOADS/$item"
if [[ -d "$dirpath" ]]; then
echo "Storing directory: $item"
dirs+=("$item")
fi
moves=0
#
# Scan the returned list to see if any files we have are online.
# Move to the ARCHIVE directory when there's a match.
#
while read -r file; do
frompath="$UPLOADS/$file"
topath="$ARCHIVE/$file"
if [[ -e "$frompath" ]]; then
#
# A file on the IA exists in the upload area. Move the
# local one if we're not in dry-run mode, otherwise just
# report the move we would do.
#
if [[ $DRYRUN -eq 0 ]]; then
movefile "$UPLOADS" "$ARCHIVE" "$file" && ((moves++))
else
printf 'Would move %s\n\tto %s\n' "$frompath" "$topath"
fi
fi
done < "$TMP1"
#
# Log this item
#
[[ $DRYRUN -eq 0 ]] && \
printf '%s moved %d %s for %s\n' "$(date +%Y%m%d%H%M%S)" \
"$moves" "$(ngettext file files "$moves")" "$item" >> "$LOGFILE"
else
printf 'Skipping %s; not in the IA\n' "$item"
fi
else
#
# Ignore all but the first file belonging to an IA identifier
#
_DEBUG "Skipped $path - repeated show number"
continue
fi
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort | cut -f2 -d' ')
# Old 'find' used:
# done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
#
# No shows processed? There was nothing to do
#
if [[ $ind -eq 0 ]]; then
[[ $DRYRUN -eq 0 ]] && echo "Nothing to do"
exit
fi
_DEBUG "Number of shows scanned: $ind"
# _DEBUG "Accumulated directories (${#dirs[*]}): $(printf '/%s/ ' "${dirs[*]}")"
#
# If there are no directories just exit.
#
[[ -v dirs ]] || exit
#
# By an (as yet) unknown process we might get duplicates, so remove them here.
#
# mapfile -t dirs < <(printf "%s\n" "${dirs[*]}" | uniq)
declare -A unique
for e in "${dirs[@]}"; do unique[$e]=1; done
dirs=( "${!unique[@]}" )
# mapfile -t dirs < <(printf '%s\n' "${!unique[@]}")
_DEBUG "Directories to process (${#dirs[*]}): $(printf '>%s< ' "${dirs[*]}")"
#
# Clean up any empty directories. These may exist because we moved their
# contents one file at a time. We only deal with the directories we've visited
# though.
#
for dir in "${dirs[@]}"; do
path="$UPLOADS/$dir"
if [[ $DRYRUN -eq 0 ]]; then
if is_empty "$path"; then
rm -rf "$path"
RES=$?
if [[ $RES -eq 0 ]]; then
echo "Deleted $path"
echo "$(date +%Y%m%d%H%M%S) deleted empty directory $path" >> "$LOGFILE"
else
echo "Failed to delete: $path"
fi
else
echo "Directory is not empty: $path"
echo "Not deleted!"
fi
else
echo "Would delete directory $path"
fi
done
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker

213
InternetArchive/transcode Executable file
View File

@@ -0,0 +1,213 @@
#!/bin/bash -
#===============================================================================
#
# FILE: transcode
#
# USAGE: ./transcode path_to_file
#
# DESCRIPTION: Given an audio file generated for archive.org transcode it to
# other audio formats: wav, opus, flac, mp3, ogg, spx. Having
# done so propagate the tags from the first file to the others.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2018-01-05 12:52:49
# REVISION: 2021-04-23 20:27:35
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
VERSION="0.0.2"
STDOUT="/dev/fd/2"
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-v] path_to_file
Performs a "transcode" action on the given file.
This means a variety of audio types are generated from the 'wav' format. The
formats required are 'wav' 'opus' 'flac' 'mp3' 'ogg' 'spx'. If the file
presented is not 'wav' format then this format is generated from the file
using ffmpeg.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
Arguments:
path_to_file The primary file to be processed. The replica files
generated from it are written to the same path. The
file is expected to be an HPR audio file with a name
such as 'hpr1234.mp3'.
endusage
exit "$result"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Options
#
while getopts :hv opt
do
case "${opt}" in
h) _usage 1;;
v) VERBOSE=1;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
VERBOSE=${VERBOSE:-0}
#
# Check for the presence of the required tools
#
for f in ffmpeg ffprobe opusenc sox speexenc transfer_tags; do
TOOL=$(command -v $f)
if [[ -z $TOOL ]]; then
echo "$SCRIPT: Unable to find required audio tool '$f'; aborted"
exit 1
fi
done
#
# We need an audio file argument
#
if [[ $# -ne 1 ]]; then
_usage 1
fi
PRIMARY=$1
WORKDIR=${PRIMARY%/*}
FILE=${PRIMARY##*/}
# echo "PRIMARY=$PRIMARY"
# echo "WORKDIR=$WORKDIR"
# echo "FILE=$FILE"
declare -a audiotypes
mapfile -t audiotypes < <(printf '%s\n' wav opus flac mp3 ogg spx)
#
# We expect an HPR-format filename and one of a list of audio formats.
# TODO: review the use of spx!
#
RE="${audiotypes[*]/%/|}"
RE="${RE// /}"
RE="${RE:0: -1}"
RE="^hpr([0-9]{1,4})\.($RE)$"
if [[ $FILE =~ $RE ]]; then
ep_num=${BASH_REMATCH[1]}
ext=${BASH_REMATCH[2]}
else
echo "$SCRIPT: Expecting an HPR audio file, got $FILE"
exit 1
fi
#
# Check the primary file exists
#
[ -e "$PRIMARY" ] || { echo "$SCRIPT: Unable to find the file: $PRIMARY"; exit 1; }
#
# Is the primary file a wav file? If not, convert it
# TODO: Can this be one with spx?
#
CHANNELS=1
WAV="$WORKDIR/hpr${ep_num}.wav"
if [[ ! -e $WAV ]]; then
if [[ ! "$ext" == 'wav' ]]; then
[[ $VERBOSE -eq 1 ]] && echo "Making a wav file from $ext"
ffmpeg -i "${PRIMARY}" -ar 44100 -ac $CHANNELS "$WAV" > /dev/null 2>&1
fi
fi
TEMP_DIR='/tmp'
#
# Make variables containing the audio variants
#
[[ $VERBOSE -eq 1 ]] && echo "Generating replica files..."
for fmt in "${audiotypes[@]}"; do
target="$WORKDIR/hpr${ep_num}.${fmt}"
case $fmt in
wav)
[[ $VERBOSE -eq 1 ]] && echo "** Nothing to do for $fmt"
continue
;;
opus)
if [[ ! -e "$target" ]]; then
if [[ $VERBOSE -eq 1 ]]; then
echo "Make format $fmt"
opusenc "$WAV" "$target"
else
opusenc "$WAV" "$target" > /dev/null 2>&1
fi
else
[[ $VERBOSE -eq 1 ]] && echo "** $target already exists"
fi
continue
;;
flac|mp3|ogg)
if [[ ! -e "$target" ]]; then
if [[ $VERBOSE -eq 1 ]]; then
echo "Make format $fmt"
sox --temp "${TEMP_DIR}" -S "$WAV" "$target"
else
sox --temp "${TEMP_DIR}" -S "$WAV" "$target" > /dev/null 2>&1
fi
else
[[ $VERBOSE -eq 1 ]] && echo "** $target already exists"
fi
continue
;;
spx)
if [[ ! -e "$target" ]]; then
if [[ $VERBOSE -eq 1 ]]; then
echo "Make format $fmt"
sox --temp "${TEMP_DIR}" -S "$WAV" -c 1 -r 16000 -t wav - |\
speexenc - "$target"
else
sox --temp "${TEMP_DIR}" -S "$WAV" -c 1 -r 16000 -t wav - |\
speexenc - "$target" > /dev/null 2>&1
fi
else
[[ $VERBOSE -eq 1 ]] && echo "** $target already exists"
fi
continue
;;
esac
done
[[ $VERBOSE -eq 1 ]] && echo "transfer_tags $PRIMARY"
if [[ $VERBOSE -eq 1 ]]; then
transfer_tags -verbose "$PRIMARY"
else
transfer_tags -noverbose "$PRIMARY"
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

932
InternetArchive/transfer_tags Executable file
View File

@@ -0,0 +1,932 @@
#!/usr/bin/perl
#===============================================================================
#
# FILE: transfer_tags
#
# USAGE: ./transfer_tags [-h] [-debug=N] [-[no]dry-run] [-[no]verbose]
# primaryfile
#
# DESCRIPTION: Transfer ID3 (or equivalent) tags from a base file to
# whichever of FLAC, OPUS, MP3, OGG, SPX and WAV versions are found.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 1.5.1
# CREATED: 2013-03-31 14:18:55
# REVISION: 2021-04-23 20:44:46
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use LWP::UserAgent;
use File::Basename;
use File::Find::Rule;
use Audio::TagLib;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '1.5.1';
#
# Declarations
#
my ( $ref, $tag, $tags, %primarytags, %replicatags, $changed );
my ( $directories, $filename, $suffix );
#
# The extensions the various codings are expected to have (it looks both for
# the lower- and upper-case versions, but only the lower-case ones are
# required here)
#
my @exts = qw{ flac m4a mp3 ogg opus spx wav };
#
# The additional name variants we'll accept
#
my @variants = qw{ _mez };
#
# Used to test tag conformity. The key is the tagname, the value is a hashref
# containing a regex (key 're') for detecting conformity and the correct value
# (key 'ok').
# To be passed to subroutine 'checkConformity', but kept here for ease of
# maintenance.
#
#my %tag_control = (
# album => { re => qr{^Hacker Public Radio$}, ok => 'Hacker Public Radio' },
# comment => {
# re => qr{^http://hackerpublicradio\.org/?},
# ok => 'http://hackerpublicradio.org'
# },
# genre => { re => qr{(?i)^Podcast$}, ok => 'Podcast' },
#);
#
# The Audio::TagLib methods to call for each tag manipulated by the script.
# The number after the method name is 1 if the value being set is a string,
# and zero otherwise.
#
my %tagmethods = (
album => [ 'setAlbum', 1 ],
artist => [ 'setArtist', 1 ],
comment => [ 'setComment', 1 ],
genre => [ 'setGenre', 1 ],
title => [ 'setTitle', 1 ],
track => [ 'setTrack', 0 ],
year => [ 'setYear', 0 ],
);
#
# Because Audio::TagLib::FileRef does not seem to commit the tag update until
# very late (during the DESTROY?) it's very difficult to update file times
# _after_ the tags have been written. The solution is to save all of the tag
# hashes (which contain the file path and times) and process then in the END{}
# block. Dirty, but very Perl-ish.
#
my @tag_stash;
#
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;
#
# Ensure STDOUT is in UTF8 mode
#
binmode( STDOUT, ":encoding(utf8)" );
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
#
# Option defaults
#
my $DEFDEBUG = 0;
my %options;
Options( \%options );
#
# Default help shows minimal information
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
if ( $options{'help'} );
#
# The -documentation or -man option shows the full POD documentation through
# a pager for convenience
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
if ( $options{'documentation'} );
#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG );
my $verbose = ( defined( $options{verbose} ) ? $options{verbose} : 0 );
#
# Get the argument, the "primary" file
#
my $primaryfile = shift;
pod2usage(
-msg => "Missing argument\n\nVersion $VERSION\n",
-exitval => 1
) unless $primaryfile;
#
# Check the file exists
#
die "$primaryfile does not exist\n" unless ( -e $primaryfile );
#
# Assume there are other versions of the same file with suffixes in the set
# { flac, mp3, ogg, opus, spx, wav } so build these names from the primary file
# name. Start by parsing the filename into its directories, filename and
# suffix. Remove the leading dot from the suffix.
#
($filename,$directories,$suffix) = fileparse($primaryfile,qr/\.[^.]*/);
$suffix =~ s/^\.//;
#
# Get the show number from the filename
#
my ( $id ) = ( $filename =~ /^hpr(\d+).*$/ );
_debug ($DEBUG > 1, "path = $directories");
_debug ($DEBUG > 1, "filename = $filename");
_debug ($DEBUG > 1, "suffix = $suffix");
_debug ($DEBUG > 1, "episode = $id");
#
# Reject the file if it doesn't have an expected suffix (we're not
# case-sensitive here)
#
die "$primaryfile does not have a recognised suffix (expecting "
. join( ", ", @exts ) . ")\n"
unless ( grep {/$suffix/i} @exts );
#
# Use File::Find::Rule to find all the files that match a regular expression.
# This is built from the parsed file, with an optional list of variants and
# all of the extensions (case insensitively). We then remove the primary file
# from the list and we're done.
#
my $re
= "^${filename}("
. join( '|', @variants ) . ")?\.(?i:"
. join( '|', @exts ) . ')$';
my @files = grep { !/^$primaryfile$/ }
File::Find::Rule->file()->name(qr{$re})->in($directories);
#
# Log the file
#
print "$primaryfile\n";
#
# Collect the tags from the primary file (& stash them for later too)
#
( $ref, $tag, $tags ) = collectTags($primaryfile);
%primarytags = %$tags;
push( @tag_stash, $tags );
#
# Report the tags found in the primary file
#
reportTags( \%primarytags );
#
# Check for mandatory tags for which there is no default
#
#if ($primarytags{artist} =~ /^\s*$/) {
# die "Missing Artist tag\n";
#}
#if ($primarytags{title} =~ /^\s*$/) {
# die "Missing Title tag\n";
#}
#
# Check that the primary file conforms to the HPR standards, ensuring that any
# changes are returned from the routine
#
#( $ref, $tag, $tags ) = checkConformity( $ref, $tag, $tags, \%tag_control );
#%primarytags = %$tags;
#
# We know what the tags should be because the 'say.php' service tells us, so
# al we need to do is synchronise the file's tags with what's expected.
#
my %showDetails;
if ( getShowDetails("http://hackerpublicradio.org/say.php?id=$id", \%showDetails) ) {
( $ref, $tag, $tags ) = checkShowDetails( $ref, $tag, $tags,
\%showDetails, $verbose );
%primarytags = %$tags;
}
print '=' x 80, "\n";
#
# Now process the "replica" files
#
foreach my $file (@files) {
#
# Check the "replica" file exists and report it if so
#
if ( -r $file ) {
print "$file\n";
}
else {
warn "$file is not readable\n";
next;
}
#
# Get the "replica" file's tags (& keep a copy in the stash)
#
( $ref, $tag, $tags ) = collectTags($file);
%replicatags = %$tags;
push( @tag_stash, $tags );
#
# Report the tags
#
reportTags( \%replicatags ) if $verbose;
#
# Change the tags to match the "primary" file's tags
#
$changed = 0;
for my $t ( sort( grep { !/^_/ } keys(%primarytags) ) ) {
$changed += changeTag( $tag, $t, $replicatags{$t}, $primarytags{$t},
@{ $tagmethods{$t} }, $verbose );
}
print '-' x 80, "\n" if $verbose;
#
# Save any changes
#
if ($changed) {
$ref->save();
}
}
exit;
#-------------------------------------------------------------------------------
# Post-processing of file times
#
# Process all files we've visited (whether their tags were changed or not)
# and force the 'atime' and 'mtime' back to their starting values. This will
# be done after the rest of the script runs, when we know that all of the
# Audio::TagLib::FileRef objects have been destroyed and have done their lazy
# updates.
#-------------------------------------------------------------------------------
END {
for my $t (@tag_stash) {
warn "Time restoration failed on $t->{_path}\n"
unless restoreTimes($t);
}
}
#=== FUNCTION ================================================================
# NAME: decodeTag
# PURPOSE: Given an Audio::TagLib::Tag object containing multiple tags,
# extract a tag which may be an encoded string
# (Audio::TagLib::String) or a plain integer. Return a string
# with the plain or decoded value.
# PARAMETERS: $attag The Audio::TagLib::Tag value
# $method The method name to call on the object, as
# a string
# RETURNS: The value from the method as a string
# DESCRIPTION: We call $method on the $attag object to return the tag. This
# may need decoding or not depending on what it is. We can do
# this without much work, though doing things this way is a lot
# less obscure.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub decodeTag {
my ( $attag, $method ) = @_;
#
# Call the desired method on the Audio::TagLib::Tag object
#
my $tag = $attag->$method();
return $tag unless defined($tag);
#
# If the result is an Audio::TagLib::String object we need to decode it
# otherwise we can just return it as it is
#
if ( ref($tag) eq 'Audio::TagLib::String' ) {
return $tag->toCString();
}
else {
return $tag;
}
}
#=== FUNCTION ================================================================
# NAME: collectTags
# PURPOSE: Collects tags from a media file
# PARAMETERS: $filepath Path to the file (a string)
# RETURNS: A list containing an Audio::TagLib::FileRef object, an
# Audio::TagLib::Tag object (containing the actual tags) and
# a hashref containing the converted tag values (along with
# a few other attributes).
# DESCRIPTION: Collects the tags and the timestamps from the file. Then the
# various tags and other attributes are placed in a hash which
# will be returned to the caller. The non-tag keys begin with
# '_' to differentiate them.
# THROWS: No exceptions
# COMMENTS: Could use decodeTag
# SEE ALSO:
#===============================================================================
sub collectTags {
my ($filepath) = @_;
my ( $atime, $mtime ) = ( stat($filepath) )[ 8, 9 ];
my $fileref = Audio::TagLib::FileRef->new($filepath);
my $ftag = $fileref->tag();
my %tags = (
album => $ftag->album()->toCString(),
artist => $ftag->artist()->toCString(),
comment => $ftag->comment()->toCString(),
genre => $ftag->genre()->toCString(),
title => $ftag->title()->toCString(),
track => $ftag->track(),
year => $ftag->year(),
_path => $filepath,
_atime => $atime,
_mtime => $mtime,
);
return ( $fileref, $ftag, \%tags );
}
#=== FUNCTION ================================================================
# NAME: reportTags
# PURPOSE: Print the tags in a hash
# PARAMETERS: $tags Hashref keyed by tagname and containing tag
# contents from a media file
# RETURNS: Nothing
# DESCRIPTION: Just prints all the "proper" tags held in the hash argument in
# alphabetical order of the keys. Note that the "secret" keys,
# those begining with '_', are skipped. See 'collectTags' for
# what they are.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO:
#===============================================================================
sub reportTags {
my ($tags) = @_;
my @keys = sort( grep { !/^_/ } keys(%$tags) );
for my $key (@keys) {
printf "%-10s: %s\n", $key, $tags->{$key};
}
return;
}
#=== FUNCTION ================================================================
# NAME: changeTag
# PURPOSE: Changes a tag to a new value if appropriate
# PARAMETERS: $tag Audio::TagLib::Tag object
# $tagname Name of tag
# $oldValue Current value of tag
# $newValue New value of tag or undefined
# $setFunc String containing the name of the 'set'
# function
# $isString True if the value being set is a string
# RETURNS: 1 if a change has been made, 0 otherwise
# DESCRIPTION: Performs some argument checks, returning on a missing new
# value, or if the old and new values are the same. The old and
# new values may be encoded integers, so we look for this
# eventuality. After all of this we know there's a change to be
# made and perform the appropriate steps to make it.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO:
#===============================================================================
sub changeTag {
my ( $tag, $tagname, $oldValue, $newValue, $setFunc, $isString, $verbose ) = @_;
return 0 unless defined($newValue);
return 0 if $oldValue eq $newValue;
$isString = 0 unless defined($isString);
if ( !$isString ) {
return 0 if int($oldValue) == int($newValue);
}
print ">> Changing $tagname to '$newValue'\n" if $verbose;
$tag->$setFunc(
( $isString
? Audio::TagLib::String->new($newValue)
: $newValue
)
);
return 1;
}
#=== FUNCTION ================================================================
# NAME: restoreTimes
# PURPOSE: Restore the original times to a file which has had its tags
# changed
# PARAMETERS: $tags Hashref keyed by tagname and containing tag
# contents (and file attributes) from a media
# file. The file details have keys beginning
# with '_'.
# RETURNS: Number of files changed (see 'utime')
# DESCRIPTION: Uses the Perl 'utime' function to change the file's access
# time and modification time to whatever is in the hash. These
# are expected to be the times the file had when it was first
# encountered.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO:
#===============================================================================
sub restoreTimes {
my ($tags) = @_;
return utime( $tags->{_atime}, $tags->{_mtime}, $tags->{_path} );
}
#=== FUNCTION ================================================================
# NAME: getShowDetails
# PURPOSE: Collects show details from an URL (on the HPR server)
# PARAMETERS: $url HPR web server address for the PHP function
# $details Heshref to contain the collected details
# RETURNS: Boolean: 1 for success, otherwise 0
# DESCRIPTION: The URL for the particular HPR show is given as a parameter.
# It's queried and the result urted into a hash, returned via
# the hashref provided as a parameter.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub getShowDetails {
my ( $url, $details ) = @_;
my ( $t, $v );
my $ua = LWP::UserAgent->new( timeout => 10 );
my $response = $ua->get($url);
if ( $response->is_success ) {
foreach my $line ( split( /\n/, $response->decoded_content ) ) {
( $t, $v ) = $line =~ /^([^:]+):\s*(.*)$/;
${$details}{$t} = $v;
}
return 1;
}
else {
return 0;
}
}
#=== FUNCTION ================================================================
# NAME: checkShowDetails
# PURPOSE: Checks the collected details against the tags and makes
# necessary changes
# PARAMETERS: $ref Audio::TagLib::FileRef relating to the
# primary file; it's how we make changes to the
# file tags in the file
# $tag Audio::TagLib::Tag containing the tags of the
# primary file
# $tags Hashref containing the converted tags (and
# a few other odds and sods)
# $details Hashref containing the definitive values from
# the website
# RETURNS: A list containing $ref, $tag and $tags as described above
# DESCRIPTION: Compares various tags with the values returned from the
# website and makes any necessary changes
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub checkShowDetails {
my ( $ref, $tag, $tags, $details, $verbose ) = @_;
my $changed = 0;
foreach my $t (qw{album artist comment genre title track year}) {
unless ( $tags->{$t} eq $details->{"HPR_$t"} ) {
$changed += changeTag( $tag, $t, $tags->{$t},
$details->{"HPR_$t"}, @{ $tagmethods{$t} }, $verbose );
$tags->{$t} = decodeTag($tag,$t);
}
}
#
# Save any changes
#
if ($changed) {
$ref->save();
}
#
# Return tag-related stuff so the caller can get the benefit
#
return ( $ref, $tag, $tags );
}
#=== FUNCTION ================================================================
# NAME: checkConformity
# PURPOSE: Check that the primary file has conforming tags, fixing them
# if not
# PARAMETERS: $ref Audio::TagLib::FileRef relating to the
# primary file
# $tag Audio::TagLib::Tag containing the tags of the
# primary file
# $tags Hashref containing the converted tags (and
# a few other odds and sods)
# $kosher Hashref containing the checking values (see
# %tag_control in the main program)
# RETURNS: A list containing $ref, $tag and $tags as described above
# DESCRIPTION: Implements a number of complex rules. Firstly the 'genre' tag
# is expected to contain 'Podcast'. Secondly the 'album' tag
# must contain 'Hacker Public Radio'. If it does not then the
# value is stored for later then replaced. Finally the 'comment'
# tag must begin with 'http://hackerpublicradio.org'. If it
# does not its current contents are stored and replaced with the
# required URL. However, the comment tag will also contain the
# saved album tag (if any) and the saved comment, and these will
# be placed at the end.
# THROWS: No exceptions
# COMMENTS: This code is ugly and difficult to extend and maintain.
# TODO look into ways of improving it!
# SEE ALSO:
#===============================================================================
#sub checkConformity {
# my ( $ref, $tag, $tags, $kosher ) = @_;
#
# my $changed = 0;
# my %saved;
# my ( $t, $commentOK, $newval );
#
# #
# # The 'genre' tag
# #
# $t = 'genre';
# unless ( $tags->{$t} =~ /$kosher->{$t}->{re}/ ) {
# $changed += changeTag(
# $tag, $t, $tags->{$t},
# $kosher->{$t}->{ok},
# @{ $tagmethods{$t} }
# );
# $tags->{genre} = $tag->genre()->toCString();
# }
#
# #
# # The 'album' tag. We save this one for adding to the comment
# #
# $t = 'album';
# unless ( $tags->{$t} =~ /$kosher->{$t}->{re}/ ) {
# ( $saved{$t} = $tags->{$t} ) =~ s/(^\s+|\s+$)//g;
# $changed += changeTag(
# $tag, $t, $tags->{$t},
# $kosher->{$t}->{ok},
# @{ $tagmethods{$t} }
# );
# $tags->{album} = $tag->album()->toCString();
# }
#
# #
# # If the 'comment' is non-standard *or* if the 'album' was changed we want
# # to do stuff here. We make sure the 'comment' is good and append the
# # original 'album' and 'comment' as appropriate.
# #
# $t = 'comment';
# $commentOK = $tags->{$t} =~ /$kosher->{$t}->{re}/;
# unless ( !$changed && $commentOK ) {
# ( $saved{$t} = $tags->{$t} ) =~ s/(^\s+|\s+$)//g;
#
# if ($changed) {
# if ($commentOK) {
# # Album had errors, comment is OK
# $newval = concat( ", ", $saved{comment}, $saved{album} );
# }
# else {
# # Album had errors, comment also
# $newval = concat( ", ", $kosher->{$t}->{ok},
# $saved{album}, $saved{comment} );
# }
# }
# else {
# # Comment had errors, album OK
# $newval = concat( ", ", $kosher->{$t}->{ok}, $saved{comment} );
# }
#
# $changed += changeTag( $tag, $t, $tags->{$t},
# $newval, @{ $tagmethods{$t} } );
# $tags->{comment} = $tag->comment()->toCString();
# }
#
# #
# # Save any changes
# #
# if ($changed) {
# $ref->save();
# }
#
# #
# # Return tag-related stuff so the caller can get the benefit
# #
# return ( $ref, $tag, $tags );
#
#}
#=== FUNCTION ================================================================
# NAME: concat
# PURPOSE: Reimplementation of join but with any undefined or empty
# arguments removed
# PARAMETERS: $sep The string to be used to separate elements in
# the result
# [variable args] Any number of arguments to be joined together
# with the separator
# RETURNS: The concatenated arguments
# DESCRIPTION: Giving 'join' an array that may contain undefined elements will
# result in empty results in the output string and error
# messages as the undefined elements are processed. Giving it
# empty string elements will result in dangling separators in
# the output. This routine removes the undefined and empty
# elements before joining the rest.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO:
#===============================================================================
sub concat {
my $sep = shift;
my @args = grep { defined($_) && length($_) > 0 } @_;
return join( $sep, @args );
}
#=== FUNCTION ================================================================
# NAME: _debug
# PURPOSE: Prints debug reports
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
# $message Message to print
# RETURNS: Nothing
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
# newline and then adds one in the 'print' to the caller doesn't
# have to bother. Prepends the message with 'D> ' to show it's
# a debug message.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _debug {
my ( $active, $message ) = @_;
chomp($message);
print "D> $message\n" if $active;
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION: Process the options we want to offer. See the documentation
# for details
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options
= ( "help", "documentation|man", "debug=i", "dry-run!", "verbose!" );
if ( !GetOptions( $optref, @options ) ) {
pod2usage(
-msg => "$PROG version $VERSION\n",
-exitval => 1,
-verbose => 0
);
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
transfer_tags - standardise and transfer tags between HPR audio files
=head1 VERSION
This documentation refers to I<transfer_tags> version 1.5.1
=head1 USAGE
transfer_tags [-help] [-documentation]
transfer_tags [-debug=N] [-[no]dry-run] primaryfile
Examples:
transfer_tags /var/IA/uploads/hpr0869.mp3
=head1 OPTIONS
=over 8
=item B<-help>
Reports brief information about how to use the script and exits. To see the
full documentation use the option B<-documentation> or B<-man>. Alternatively,
to generate a PDF version use the I<pod2pdf> tool from
I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
installed with the cpan tool as App::pod2pdf.
=item B<-documentation> or B<-man>
Reports full information about how to use the script and exits. Alternatively,
to generate a PDF version use the I<pod2pdf> tool from
I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
installed with the cpan tool as App::pod2pdf.
=item B<-debug=N>
Run in debug mode at the level specified by I<N>. Possible values are:
=over 4
=item B<0>
No debugging (the default).
=item B<1>
TBA
=item B<2>
TBA
=item B<3>
TBA
=back
=back
=head1 REQUIRED ARGUMENTS
=over 4
=item B<primaryfile>
This is the name of the audio file, which contains the definitive tags which
are to be copied to all of the other files of the same name but different
extensions.
=back
=head1 DESCRIPTION
The script transfers ID3 (or equivalent) tags from a base file to whichever of
FLAC, MP3, OGG, SPX and WAV versions are found. The tags copied are: B<album>,
B<artist>, B<comment>, B<genre>, B<title>, B<track> and B<year>. The target
files are determined by taking the name of the B<primary file> without its
extension and appending all of the remaining extensions in the list. Files
with the string "B<_mez>" between the filename and the extension are also
included.
For example: if the B<primary file> is called B<hpr1234.flac> and FLAC, MP3,
OGG, SPX and WAV versions exist, the tags found in the file B<hpr1234.flac>
are copied to B<hpr1234.mp3>, B<hpr1234.ogg>, B<hpr1234.opus>, B<hpr1234.spx>
and B<hpr1234.wav>. If B<hpr1234_mez.mp3> or any other variant existed it
would also receive a copy of the tags.
A certain amount of manipulation is performed before the tags are propagated.
The changes made conform to certain rules, which are:
=over 4
=item .
The B<genre> tag must contain the string "I<Podcast>".
=item .
The B<album> tag must contain the string "I<Hacker Public Radio>". If it does
not then the existing value is stored for later and is then replaced.
=item .
The B<comment> tag must begin with the string
"I<http://hackerpublicradio.org>". If it does not its current contents are
stored and replaced with the required URL. However, the comment tag will also
contain the saved album tag (if any) and the saved comment (if any), and these
will be placed at the end, separated by commas.
=back
The script saves the access time and modification time of all of the media
files it processes. It then restores these times at the end of its run. This
prevents any external processes which depend on these file times from being
confused by the tag changes.
=head1 DIAGNOSTICS
=over 4
=item B<Usage: transfer_tags primaryfile>
This error is produced if the script is called without the mandatory argument.
The error is fatal.
=item B<... does not exist>
The primary file specified as the argument does not exist. The error is fatal.
=item B<... does not have a recognised suffix (expecting ...)>
The primary file specified as the argument does not have one of the expected
extensions (flac, mp3, ogg, opus, spx, wav). The error is fatal.
=item B<... is not readable>
One of the target files was found not to be readable (probably due to file
permissions). The script will ignore this file.
=item B<Time restoration failed on ...>
The primary file or one of the target files could not have its time restored.
The script will ignore this file.
=back
=head1 DEPENDENCIES
Audio::TagLib
File::Basename
File::Find::Rule
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com).
Patches are welcome.
=head1 AUTHOR
Dave Morriss (Dave.Morriss@gmail.com) 2013
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2013 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

311
InternetArchive/update_state Executable file
View File

@@ -0,0 +1,311 @@
#!/bin/bash -
#===============================================================================
#
# FILE: update_state
#
# USAGE: ./update_state
#
# DESCRIPTION: A script to update the state of shows which have been sent to
# the IA. It looks at the current state of the 'reservations'
# table on the HPR database and selects all shows which are in
# the state 'MEDIA_TRANSCODED'. It checks each one to see if it
# known to the IA and if so changes state to 'UPLOADED_TO_IA'.
#
# The IA check can be overridden using the '-F' option, but care
# should be taken not to do this unless it is known all eligible
# shows are uploaded.
#
# Note that the algorithm described here does not work for
# reserved shows like the Community News episodes since they are
# not submitted as such and have no entry in the 'reservations'
# table.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.9
# CREATED: 2022-04-19 12:50:52
# REVISION: 2023-10-21 22:51:42
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
# DIR=${0%/*}
# shellcheck disable=SC2034
VERSION="0.0.9"
STDOUT="/dev/fd/2"
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "$SCRIPT: Unable to source functions"; exit 1; }
# shellcheck source=/home/cendjm/bin/function_lib.sh
source "$LIB"
#
# Colour codes
#
define_colours
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Report usage
# PARAMETERS: None
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i res="${1:-0}"
cat >$STDOUT <<-endusage
Usage: ./${SCRIPT} [-h] [-D] [-d] [-F] [-l N] [-m]
Version: $VERSION
Script to update the status in the 'reservations' table after a show has been
processed.
Options:
-h Print this help
-D Enable DEBUG mode where a lot of information about the workins
of the script is displayed
-d Dry-run mode. Reports what it will do but doesn't do it
-F Force the update(s) without checking the state of the show on
the IA
-l N Limit the number of shows processed to N
-m Monochrome mode - no colours
Examples
./${SCRIPT} -h
./${SCRIPT} -m
./${SCRIPT} -d
./${SCRIPT} -dm
./${SCRIPT} -Dd
./${SCRIPT} -l1
./${SCRIPT}
endusage
exit "$res"
}
#=== FUNCTION ================================================================
# NAME: _DEBUG
# DESCRIPTION: Writes a message if in DEBUG mode
# PARAMETERS: List of messages
# RETURNS: Nothing
#===============================================================================
_DEBUG () {
[ "$DEBUG" == 0 ] && return
for msg in "$@"; do
printf 'D> %s\n' "$msg"
done
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Configure depending whether local or on borg
#
case $HOSTNAME in
hprvps|marvin|borg)
# UPLOADS="/data/IA/uploads"
BASEDIR="$HOME/IA" ;;
i7-desktop)
# UPLOADS="$HOME/HPR/IA/uploads"
BASEDIR="$HOME/HPR/IA" ;;
*)
echo "Wrong host!"; exit 1 ;;
esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Tools
#
BASECOM='curl -K ./.hpradmin_curlrc -s'
URL="https://hub.hackerpublicradio.org/cms/status.php"
QUERY1="${BASECOM} ${URL}"
QUERY2="${BASECOM} -o - ${URL}"
#
# Fallback URLs and commands
#
URL_BAK="http://hub.hackerpublicradio.org/cms/status.php"
QUERY1_BAK="${BASECOM} ${URL_BAK}"
QUERY2_BAK="${BASECOM} -o - ${URL_BAK}"
#
# Number of retries per show
#
RETRIES=3
#
# Option defaults
#
COLOUR=1 # use colours by default
DRYRUN=0 # live mode by default
DEBUG=0
FORCE=0
DEFLIMIT=20
#
# Process options
#
while getopts :hdDFl:m opt
do
case "${opt}" in
h) _usage;;
d) DRYRUN=1;;
D) DEBUG=1;;
F) FORCE=1;;
l) LIMIT=$OPTARG;;
m) COLOUR=0;;
?) echo "$SCRIPT: Invalid option; aborting"; exit 1;;
esac
done
shift $((OPTIND - 1))
#
# Cancel colours if requested
#
if [[ $COLOUR -eq 0 ]]; then
undefine_colours
fi
LIMIT=${LIMIT:-$DEFLIMIT}
if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then
echo "** Use '-l 1' up to '-l $DEFLIMIT' or omit the option"
_usage 1
fi
if [[ $FORCE -eq 1 ]]; then
coloured 'yellow' "Forcing updates without checking the IA state"
fi
#
# Check the argument count after any options
#
if [[ $# -ne 0 ]]; then
coloured 'red' "** ${SCRIPT} takes no arguments"
_usage 1
fi
#
# Collect the current table of shows requiring work. We expect something like:
# timestamp_epoc,ep_num,ep_date,key,status,email
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
# TODO: Check for a failure in the query?
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
#
reservations=$($QUERY2) || {
coloured 'red' "Problem querying $URL"
coloured 'yellow' "Falling back to $URL_BAK"
reservations=$($QUERY2_BAK) || {
coloured 'red' "Failed with fallback URL - aborting"
exit 1
}
}
_DEBUG "reservations = $reservations"
#
# Check which shows are on the IA and can be flagged as such. We get the work
# "queue" from the variable 'reservations' which contains lines returned from
# querying the CMS status interface.
#
count=0
while read -r line; do
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
state="${BASH_REMATCH[5]}"
show="${BASH_REMATCH[2]}"
#
# Process shows in just one of the states
#
if [[ $state = 'MEDIA_TRANSCODED' ]]; then
_DEBUG "show = $show, state = $state"
retry_count=$RETRIES
while [ $retry_count -gt 0 ]; do
#
# Look for the show on the IA. If not found we sleep 30
# seconds and look again. This happens a limited number of
# times, controlled by $RETRIES, then we give up this show. If
# there are more shows then we keep going.
#
if [ $FORCE -eq 1 ] || ia list "hpr$show" > /dev/null 2>&1; then
command="${QUERY1}?ep_num=${show}&status=UPLOADED_TO_IA"
command_bak="${QUERY1_BAK}?ep_num=${show}&status=UPLOADED_TO_IA"
#
# In dry-run mode we count this iteration as success. In
# live mode we exit if the command fails. Otherwise we
# assume the command succeeds and exit the retry loop.
#
if [[ $DRYRUN -eq 1 ]]; then
echo -e "Dry-run: would have run\n${yellow}$command${reset}"
else
coloured 'yellow' "$command"
$command || {
coloured 'red' "Problem querying $URL"
coloured 'yellow' "Falling back to $URL_BAK"
$command_bak || {
coloured 'red' "Failed with fallback URL - aborting"
exit 1
}
}
RES=$?
if [[ $RES -ne 0 ]]; then
coloured 'red' "** Problem running $command; aborting"
exit 1
fi
fi
#
# Success. Stop the loop
#
break
else
#
# Failed to find the show, have another go after a wait
#
coloured 'red' "Show $show is not yet uploaded"
sleep 30
fi
((retry_count--))
done
if [[ $retry_count -eq 0 ]]; then
coloured 'red' "Failed to update show $show; retry count reached"
coloured 'yellow' "The command 'ia list hpr$show' repeatedly returned \"failure\""
coloured 'yellow' "Database updates not done"
coloured 'yellow' "Try again later with './update_state'"
fi
#
# Stop the loop if we have reached the limiting number
#
((count++))
[[ $count -eq $LIMIT ]] && {
echo "Upload limit ($LIMIT) reached"
break
}
fi
fi
done <<< "$reservations"
if [[ $DRYRUN -eq 0 ]]; then
echo "Number of shows processed successfully: $count"
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21

2803
InternetArchive/upload_manager Executable file

File diff suppressed because it is too large Load Diff

298
InternetArchive/weekly_upload Executable file
View File

@@ -0,0 +1,298 @@
#!/bin/bash -
#===============================================================================
#
# FILE: weekly_upload
#
# USAGE: ./weekly_upload [-h] [-r] [-v] [-d {0|1}] start [count]
#
# DESCRIPTION: Run the commands necessary to upload a batch of HPR shows to
# archive.org
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.9
# CREATED: 2018-10-12 21:54:35
# REVISION: 2020-10-13 13:54:32
#
#===============================================================================
set -o nounset # Treat unset variables as an error
SCRIPT=${0##*/}
#DIR=${0%/*}
VERSION="0.0.9"
STDOUT="/dev/fd/2"
#
# Select the appropriate working directory
#
case $(hostname) in
i7-desktop)
BASEDIR="$HOME/HPR/InternetArchive"
UPLOAD="$BASEDIR/uploads"
;;
hprvps|marvin|borg)
BASEDIR="$HOME/IA"
UPLOAD="/var/IA/uploads"
;;
*)
echo "Wrong host!"
exit 1
;;
esac
cd "$BASEDIR" || exit 1
#
# Load library functions
#
LIB="$HOME/bin/function_lib.sh"
[ -e "$LIB" ] || { echo "Unable to source functions"; exit; }
# shellcheck disable=SC1090
source "$LIB"
#=== FUNCTION ================================================================
# NAME: check_uploads
# DESCRIPTION: Determines if files exist for uploading
# PARAMETERS: 1 - filename prefix e.g. 'hpr9999'
# RETURNS: True/false
#===============================================================================
check_uploads () {
local prefix=${1:?Usage: check_uploads prefix}
local suff
#
# Look for files called hpr1234.flac and so on. Don't bother with the
# hpr1234_source.flac one. As soon as a file is missing return with false.
#
for suff in flac mp3 ogg opus spx wav; do
if [[ ! -e $UPLOAD/$prefix.$suff ]]; then
return 1
fi
done
return 0
}
#=== FUNCTION ================================================================
# NAME: _usage
# DESCRIPTION: Reports usage; always exits the script after doing so
# PARAMETERS: 1 - the integer to pass to the 'exit' command
# RETURNS: Nothing
#===============================================================================
_usage () {
local -i result=${1:-0}
cat >$STDOUT <<-endusage
${SCRIPT} - version: ${VERSION}
Usage: ./${SCRIPT} [-h] [-r] [-v] [-d {0|1}] start [count]
Generates the necessary metadata and script and uses them to upload HPR audio
and other show-related files held on the VPS to the Internet Archive.
Options:
-h Print this help
-v Run in verbose mode where more information is reported
-d 0|1 Dry run: -d 1 (the default) runs the script in dry-run
mode where nothing is changed but the actions that
will be taken are reported; -d 0 turns off dry-run
mode and the actions will be carried out.
-r Run in 'remote' mode, using the live database over an
(already established) SSH tunnel. Default is to run
against the local database.
Arguments:
start the starting show number to be uploaded
count (optional, default 1) the number of shows to be
uploaded; cannot exceed 20
Notes:
1. When running on 'marvin' the method used is to run in faux 'local' mode.
This means we have an open tunnel to the HPR server (mostly left open) and
the default file .hpr_db.cfg points to the live database via this tunnel.
So we do not use the -r option here. This is a bit of a hack! Sorry!
2. There are potential problems when a show has no tags which haven't been
fully resolved. The make_metadata script fails in default mode when it
finds such a show, but this (weekly_upload) script can continue on and run
the generated script which uploads the source audio files. This can mean
the IA items end up as books! In this mode the description is not stored
and so there are no show notes.
endusage
exit "$result"
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Process options
#
while getopts :d:hrv opt
do
case "${opt}" in
d) DRYRUN=$OPTARG;;
h) _usage 1;;
r) REMOTE=1;;
v) VERBOSE=1;;
*) _usage 1;;
esac
done
shift $((OPTIND - 1))
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
_usage 1
fi
VERBOSE=${VERBOSE:-0}
REMOTE=${REMOTE:-0}
if [[ $REMOTE -eq 0 ]]; then
dbconfig="$BASEDIR/.hpr_db.cfg"
[[ $VERBOSE -eq 1 ]] && echo "Local database mode"
else
dbconfig="$BASEDIR/.hpr_livedb.cfg"
[[ $VERBOSE -eq 1 ]] && echo "Remote database mode"
fi
#
# Check argument count
#
if [[ ! ( $# -eq 1 || $# -eq 2 ) ]]; then
echo "Wrong number of arguments"
_usage 1
fi
#
# Validate arguments
#
for arg; do
if [[ ! $arg =~ ^[0-9]{1,4}$ ]]; then
echo "Invalid number: $arg"
echo "Use a plain number"
exit 1
fi
done
start=$1
count=${2:-1}
if [[ $count -gt 20 ]]; then
echo "Can't process more than 20 shows at a time"
exit 1
fi
[[ $VERBOSE -eq 1 && $DRYRUN -eq 1 ]] && echo "Dry run mode"
#
# Two paths; one for a single episode, the other for multiple episodes
#
if [[ $count -eq 1 ]]; then
#
# Single episode
# --------------
#
# Check existence of files
#
if ! check_uploads "hpr$start"; then
echo "Missing files for show $start. Aborted!"
exit 1
fi
#
# Define files
#
metadata="metadata_${start}.csv"
script="script_${start}.sh"
if [[ $DRYRUN -eq 1 ]]; then
echo "Dry run: Would have uploaded $start"
echo "Dry run: Would have created $metadata and $script"
echo "Dry run: Would have uploaded $metadata and run $script"
echo "Dry run: Would have used $dbconfig"
else
echo "Uploading $start"
if yes_no "OK to continue? %s " "N"; then
# shellcheck disable=SC2086
{
#
# Make the metadata
#
$BASEDIR/make_metadata -dbconf=${dbconfig} -from=$start -verb -out -script
RES=$?
if [[ $RES -eq 0 ]]; then
ia upload --retries=3 --spreadsheet=${metadata} \
-H x-archive-keep-old-version:0 && ./${script}
else
echo "Upload aborted due to errors"
fi
}
else
echo "Not uploaded"
fi
fi
else
#
# Multiple episodes
# -----------------
#
# Compute end show number
#
((end = start + count - 1))
#
# Check existence of files
#
for (( i = start; i < end; i++ )); do
if ! check_uploads "hpr$i"; then
echo "Missing files for show $i. Aborted!"
exit 1
fi
done
#
# Define files
#
metadata="metadata_${start}-${end}.csv"
script="script_${start}-${end}.sh"
if [[ $DRYRUN -eq 1 ]]; then
echo "Dry run: Would have uploaded $start to $end inclusive"
echo "Dry run: Would have created $metadata and $script"
echo "Dry run: Would have uploaded $metadata and run $script"
echo "Dry run: Would have used $dbconfig"
else
echo "Uploading $start to $end inclusive"
if yes_no "OK to continue? %s " "N"; then
# shellcheck disable=2086
{
#
# Make the metadata
#
$BASEDIR/make_metadata -dbconf=${dbconfig} -from=$start -count=$count -verb -out -script
RES=$?
if [[ $RES -eq 0 ]]; then
ia upload --retries=3 --spreadsheet=${metadata} \
-H x-archive-keep-old-version:0 && ./${script}
else
echo "Upload aborted due to errors"
fi
}
else
echo "Not uploaded"
fi
fi
fi
exit
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21