forked from HPR/hpr-tools
New 'reformat_html', plus some cleaning
InternetArchive/future_upload: now updates the state of shows InternetArchive/reformat_html: new Perl script to reformat the HTML originally found in the HPR database in the 'notes' field to the format required in the 'description' field of an item on the IA. It reads from STDIN and writes to STDOUT.
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
#!/bin/bash -
|
||||
# shellcheck disable=SC2317
|
||||
#===============================================================================
|
||||
#
|
||||
# FILE: future_upload
|
||||
#
|
||||
# USAGE: ./future_upload
|
||||
# USAGE: ./future_upload [-h] [-v] [-D] [-d {0|1}] [-F] [-r] [-l cp]
|
||||
#
|
||||
# DESCRIPTION: Uploads future HPR shows based on what is in the upload area
|
||||
#
|
||||
@@ -13,9 +14,9 @@
|
||||
# NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as
|
||||
# well as 'update_state'
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.16
|
||||
# VERSION: 0.0.17
|
||||
# CREATED: 2021-01-07 12:11:02
|
||||
# REVISION: 2025-01-01 11:48:40
|
||||
# REVISION: 2025-01-06 17:51:57
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
@@ -26,7 +27,7 @@ SCRIPT=${0##*/}
|
||||
|
||||
STDOUT="/dev/fd/2"
|
||||
|
||||
VERSION="0.0.16"
|
||||
VERSION="0.0.17"
|
||||
|
||||
#
|
||||
# Load library functions
|
||||
@@ -36,7 +37,7 @@ LIB="$HOME/bin/function_lib.sh"
|
||||
# shellcheck disable=SC1090
|
||||
source "$LIB"
|
||||
|
||||
# {{{ -- Functions -- check_uploads, _log, _usage
|
||||
# {{{ -- Functions -- check_uploads, update_show_state, _log, _usage
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: check_uploads
|
||||
@@ -72,6 +73,36 @@ check_uploads () {
|
||||
return 0
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: update_show_state
|
||||
# DESCRIPTION: Updates the status of a single show in the HPR database.
|
||||
# It is assumed the caller has found the show number in the
|
||||
# 'reservations' table with the required status of
|
||||
# 'MEDIA_TRANSCODED'. All this function does is to change this
|
||||
# to 'UPLOADED_TO_IA', returning true if successful, otherwise
|
||||
# false.
|
||||
# PARAMETERS: $show Show number to update
|
||||
# RETURNS: True if the update worked, otherwise false
|
||||
#===============================================================================
|
||||
update_show_state () {
|
||||
local show=${1:?Usage: update_state show}
|
||||
local BASECOM URL QUERY COMMAND RES
|
||||
|
||||
BASECOM='curl -K ./.hpradmin_curlrc -s'
|
||||
URL="https://hub.hackerpublicradio.org/cms/status.php"
|
||||
QUERY="${BASECOM} ${URL}"
|
||||
|
||||
COMMAND="${QUERY}?ep_num=${show}&status=UPLOADED_TO_IA"
|
||||
|
||||
$COMMAND
|
||||
RES=$?
|
||||
if [[ $RES -ne 0 ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: _log
|
||||
# DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script
|
||||
@@ -83,7 +114,7 @@ check_uploads () {
|
||||
# PARAMETERS: 1 - the message to write
|
||||
# RETURNS: Nothing
|
||||
#===============================================================================
|
||||
# shellcheck disable=SC2317 disable=SC2059
|
||||
# shellcheck disable=SC2059
|
||||
_log () {
|
||||
local msg="$1"
|
||||
|
||||
@@ -180,7 +211,7 @@ BASECOM='curl -K ./.hpradmin_curlrc -s'
|
||||
URL="https://hub.hackerpublicradio.org/cms/status.php"
|
||||
# QUERY1="${BASECOM} ${URL}"
|
||||
QUERY2="${BASECOM} -o - ${URL}"
|
||||
UPSTATE="$BASEDIR/update_state"
|
||||
# UPSTATE="$BASEDIR/update_state"
|
||||
|
||||
#
|
||||
# Fallback URL
|
||||
@@ -199,10 +230,10 @@ ia=$(command -v ia)
|
||||
echo "Needs the 'make_metadata' script"
|
||||
exit 1
|
||||
}
|
||||
[ -e "$UPSTATE" ] || {
|
||||
echo "Needs the 'update_state' script"
|
||||
exit 1
|
||||
}
|
||||
# [ -e "$UPSTATE" ] || {
|
||||
# echo "Needs the 'update_state' script"
|
||||
# exit 1
|
||||
# }
|
||||
|
||||
#
|
||||
# File of processed shows
|
||||
@@ -234,6 +265,9 @@ do
|
||||
done
|
||||
shift $((OPTIND - 1))
|
||||
|
||||
#
|
||||
# Check and set option variables
|
||||
#
|
||||
DRYRUN=${DRYRUN:-1}
|
||||
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
|
||||
echo "** Use '-d 0' or '-d 1'"
|
||||
@@ -272,6 +306,7 @@ fi
|
||||
|
||||
#
|
||||
# Declarations
|
||||
# ------------
|
||||
#
|
||||
declare -A processed
|
||||
declare -A ready
|
||||
@@ -282,6 +317,7 @@ lastitem=
|
||||
|
||||
#
|
||||
# Load array of processed shows
|
||||
# ---- ----- -- --------- -----
|
||||
#
|
||||
while read -r item; do
|
||||
processed+=([$item]=1)
|
||||
@@ -289,46 +325,17 @@ done < "$PROCFILE"
|
||||
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
|
||||
|
||||
#
|
||||
# TODO: Create the associative array 'ready' containing the numbers of shows
|
||||
# ready for upload. This is a way to ensure that we don't try and upload shows
|
||||
# in transit to the upload area.
|
||||
# Populate the associative array 'ready' with the numbers of shows ready for
|
||||
# upload. This is a way to ensure that we don't try and upload shows in
|
||||
# transit to the upload area. Only do this if force mode is off.
|
||||
#
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# Proposed code. Not sure what the actual URL will be nor what will be
|
||||
# returned if nothing is ready for upload yet
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
#
|
||||
# json=$(curl http://hackerpublicradio.org/queue.php -s -o -)
|
||||
# while read -r showno; do
|
||||
# ready+=([$showno]=1)
|
||||
# done < <(echo "${json}" | jq '.READY_FOR_IA_UPLOAD[] | tonumber')
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
# Change of plan. Now we have a list of CSV values, so we need to do something
|
||||
# like this:
|
||||
#
|
||||
# reservations=$($BASECOM -o - $URL)
|
||||
# while read -r line; do
|
||||
# if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
|
||||
# state="${BASH_REMATCH[5]}"
|
||||
# show="${BASH_REMATCH[2]}"
|
||||
# fi
|
||||
# if [[ $state = 'MEDIA_TRANSCODED' ]]; then
|
||||
# ready+=([$show]=1)
|
||||
# fi
|
||||
# done <<< $reservations
|
||||
#
|
||||
# At the end of this the associative array 'ready' will contain the keys of
|
||||
# shows that are ready for upload (presumably) so we can look in this array to
|
||||
# double check.
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
if [[ $FORCE -eq 0 ]]; then
|
||||
#
|
||||
# Collect the current table of shows requiring work. We expect something like:
|
||||
# timestamp_epoc,ep_num,ep_date,key,status,email
|
||||
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
|
||||
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
|
||||
# TODO: Check for a failure in the query?A
|
||||
# TODO: Reinstate the check for a failure in the query? Se update_state
|
||||
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
|
||||
#
|
||||
reservations=$($QUERY2) || {
|
||||
@@ -353,8 +360,8 @@ if [[ $FORCE -eq 0 ]]; then
|
||||
fi
|
||||
|
||||
#
|
||||
# The query returns the bare number, but we're using 'hprxxxx' as the key in
|
||||
# the 'ready' array.
|
||||
# The query returns the bare show number, but we're using 'hprxxxx' as the
|
||||
# key in the 'ready' array.
|
||||
#
|
||||
while read -r line; do
|
||||
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
|
||||
@@ -374,7 +381,10 @@ fi
|
||||
|
||||
#
|
||||
# Process files. There will be several with the same prefix so look for
|
||||
# a change of prefix
|
||||
# a change of prefix.
|
||||
#
|
||||
# The loop is reading from the following pipeline:
|
||||
# find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort
|
||||
#
|
||||
while read -r path; do
|
||||
#
|
||||
@@ -390,8 +400,8 @@ while read -r path; do
|
||||
_DEBUG "Item $item"
|
||||
|
||||
#
|
||||
# Detect that the item prefix has changed. If it has we're processing
|
||||
# a new IA identifier, so work on this one
|
||||
# Detect that the item prefix has changed. If it has we've found a new IA
|
||||
# identifier, so work on the previous one
|
||||
#
|
||||
if [[ $item != "$lastitem" ]]; then
|
||||
lastitem=$item
|
||||
@@ -425,7 +435,8 @@ while read -r path; do
|
||||
processed+=([$lastitem]=1)
|
||||
else
|
||||
#
|
||||
# Is the show ready for upload?
|
||||
# Is the show ready for upload? We don't check if force mode
|
||||
# is on. If not ready we skip this show.
|
||||
#
|
||||
if [[ $FORCE -eq 0 ]]; then
|
||||
if [[ ! -v "ready[$lastitem]" ]]; then
|
||||
@@ -472,10 +483,9 @@ while read -r path; do
|
||||
|
||||
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
|
||||
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
# Write the processed array to the cache file unless in dry-run mode
|
||||
#
|
||||
# [ $DEBUG -eq 1 ] && { echo -n 'D> '; declare -p processed; }
|
||||
#-------------------------------------------------------------------------------
|
||||
_DEBUG "processed = ${!processed[*]}"
|
||||
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
|
||||
if [[ $DRYRUN -ne 1 ]]; then
|
||||
@@ -484,24 +494,26 @@ if [[ $DRYRUN -ne 1 ]]; then
|
||||
done < <(printf '%s\n' "${!processed[@]}" | sort -u ) > "$PROCFILE"
|
||||
fi
|
||||
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
# Generate the list of uploads for the 'make_metadata' option '-list=1,2,3'.
|
||||
# The show numbers are keys in the associative array 'uploads'. The
|
||||
# end-product is a comma-separated list of the keys in the variable '$list'.
|
||||
# Order is unimportant because make_metadata sorts internally.
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
_DEBUG "uploads = ${!uploads[*]}"
|
||||
[ "$VERBOSE" -eq 1 ] && echo "Number of shows for upload: ${#uploads[@]}"
|
||||
printf -v list '%s,' "${!uploads[@]}"
|
||||
list="${list:0:-1}"
|
||||
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
# If there are no uploads to do we can stop
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
[[ ! -v uploads[@] ]] && { echo "Nothing to do!"; exit; }
|
||||
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
# Check that the shows being uploaded have all their files and log what is
|
||||
# happening.
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
while read -r show; do
|
||||
echo "$(date +%Y%m%d%H%M%S) preparing to upload hpr$show" >> "$LOGFILE"
|
||||
|
||||
@@ -512,10 +524,10 @@ while read -r show; do
|
||||
fi
|
||||
done < <(printf '%s\n' "${!uploads[@]}" | sort)
|
||||
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
# Define output files. If the list contains one element then it's a different
|
||||
# name from the multi-element case (make_metadata does this too).
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
if [[ ${#uploads[@]} -eq 1 ]]; then
|
||||
metadata="metadata_${minshow}.csv"
|
||||
script="script_${minshow}.sh"
|
||||
@@ -524,9 +536,9 @@ else
|
||||
script="script_${minshow}-${maxshow}.sh"
|
||||
fi
|
||||
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
# Perform the uploads or report what would be done
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
if [[ $DRYRUN -eq 1 ]]; then
|
||||
echo "Dry run: Would have uploaded list '$list'"
|
||||
echo "Dry run: Would have created $metadata and $script"
|
||||
@@ -573,17 +585,17 @@ else
|
||||
echo "$(date +%Y%m%d%H%M%S) ${#uploads[@]} uploads completed" >> "$LOGFILE"
|
||||
|
||||
#
|
||||
# Update the state in the HPR database, unless we're using
|
||||
# FORCE. Pass the limit used here to this script so it can
|
||||
# stop looking for work unnecessarily
|
||||
# Update the state of all the shows being processed in the
|
||||
# HPR database, unless we're using FORCE.
|
||||
#
|
||||
if [[ $FORCE -eq 0 ]]; then
|
||||
$UPSTATE -l$LIMIT
|
||||
RES=$?
|
||||
if [[ $RES -ne 0 ]]; then
|
||||
echo "Problem updating database state"
|
||||
exit 1
|
||||
fi
|
||||
while read -r show; do
|
||||
if update_show_state $show; then
|
||||
echo "Updated state for show $show"
|
||||
else
|
||||
echo "Failed to update state for show $show"
|
||||
fi
|
||||
done < <(printf '%s\n' "${!uploads[@]}" | sort)
|
||||
else
|
||||
echo "Not updating the database, FORCE mode is on"
|
||||
fi
|
||||
|
Reference in New Issue
Block a user