Compare commits

..

No commits in common. "ee4a17423357e00d988a6dc7d5ca2a1416789119" and "6621e677039bb7d160b4e083909c250507642b72" have entirely different histories.

4 changed files with 81 additions and 334 deletions

View File

@ -1,10 +1,9 @@
#!/bin/bash -
# shellcheck disable=SC2317
#===============================================================================
#
# FILE: future_upload
#
# USAGE: ./future_upload [-h] [-v] [-D] [-d {0|1}] [-F] [-r] [-l cp]
# USAGE: ./future_upload
#
# DESCRIPTION: Uploads future HPR shows based on what is in the upload area
#
@ -14,9 +13,9 @@
# NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as
# well as 'update_state'
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.17
# VERSION: 0.0.16
# CREATED: 2021-01-07 12:11:02
# REVISION: 2025-01-06 17:51:57
# REVISION: 2025-01-01 11:48:40
#
#===============================================================================
@ -27,7 +26,7 @@ SCRIPT=${0##*/}
STDOUT="/dev/fd/2"
VERSION="0.0.17"
VERSION="0.0.16"
#
# Load library functions
@ -37,7 +36,7 @@ LIB="$HOME/bin/function_lib.sh"
# shellcheck disable=SC1090
source "$LIB"
# {{{ -- Functions -- check_uploads, update_show_state, _log, _usage
# {{{ -- Functions -- check_uploads, _log, _usage
#=== FUNCTION ================================================================
# NAME: check_uploads
@ -73,36 +72,6 @@ check_uploads () {
return 0
}
#=== FUNCTION ================================================================
# NAME: update_show_state
# DESCRIPTION: Updates the status of a single show in the HPR database.
# It is assumed the caller has found the show number in the
# 'reservations' table with the required status of
# 'MEDIA_TRANSCODED'. All this function does is to change this
# to 'UPLOADED_TO_IA', returning true if successful, otherwise
# false.
# PARAMETERS: $show Show number to update
# RETURNS: True if the update worked, otherwise false
#===============================================================================
update_show_state () {
local show=${1:?Usage: update_state show}
local BASECOM URL QUERY COMMAND RES
BASECOM='curl -K ./.hpradmin_curlrc -s'
URL="https://hub.hackerpublicradio.org/cms/status.php"
QUERY="${BASECOM} ${URL}"
COMMAND="${QUERY}?ep_num=${show}&status=UPLOADED_TO_IA"
$COMMAND
RES=$?
if [[ $RES -ne 0 ]]; then
return 1
fi
return 0
}
#=== FUNCTION ================================================================
# NAME: _log
# DESCRIPTION: Writes a log record to the predefined $LOGFILE in this script
@ -114,7 +83,7 @@ update_show_state () {
# PARAMETERS: 1 - the message to write
# RETURNS: Nothing
#===============================================================================
# shellcheck disable=SC2059
# shellcheck disable=SC2317 disable=SC2059
_log () {
local msg="$1"
@ -211,7 +180,7 @@ BASECOM='curl -K ./.hpradmin_curlrc -s'
URL="https://hub.hackerpublicradio.org/cms/status.php"
# QUERY1="${BASECOM} ${URL}"
QUERY2="${BASECOM} -o - ${URL}"
# UPSTATE="$BASEDIR/update_state"
UPSTATE="$BASEDIR/update_state"
#
# Fallback URL
@ -230,10 +199,10 @@ ia=$(command -v ia)
echo "Needs the 'make_metadata' script"
exit 1
}
# [ -e "$UPSTATE" ] || {
# echo "Needs the 'update_state' script"
# exit 1
# }
[ -e "$UPSTATE" ] || {
echo "Needs the 'update_state' script"
exit 1
}
#
# File of processed shows
@ -265,9 +234,6 @@ do
done
shift $((OPTIND - 1))
#
# Check and set option variables
#
DRYRUN=${DRYRUN:-1}
if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then
echo "** Use '-d 0' or '-d 1'"
@ -306,7 +272,6 @@ fi
#
# Declarations
# ------------
#
declare -A processed
declare -A ready
@ -317,7 +282,6 @@ lastitem=
#
# Load array of processed shows
# ---- ----- -- --------- -----
#
while read -r item; do
processed+=([$item]=1)
@ -325,17 +289,46 @@ done < "$PROCFILE"
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
#
# Populate the associative array 'ready' with the numbers of shows ready for
# upload. This is a way to ensure that we don't try and upload shows in
# transit to the upload area. Only do this if force mode is off.
# TODO: Create the associative array 'ready' containing the numbers of shows
# ready for upload. This is a way to ensure that we don't try and upload shows
# in transit to the upload area.
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Proposed code. Not sure what the actual URL will be nor what will be
# returned if nothing is ready for upload yet
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# json=$(curl http://hackerpublicradio.org/queue.php -s -o -)
# while read -r showno; do
# ready+=([$showno]=1)
# done < <(echo "${json}" | jq '.READY_FOR_IA_UPLOAD[] | tonumber')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Change of plan. Now we have a list of CSV values, so we need to do something
# like this:
#
# reservations=$($BASECOM -o - $URL)
# while read -r line; do
# if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
# state="${BASH_REMATCH[5]}"
# show="${BASH_REMATCH[2]}"
# fi
# if [[ $state = 'MEDIA_TRANSCODED' ]]; then
# ready+=([$show]=1)
# fi
# done <<< $reservations
#
# At the end of this the associative array 'ready' will contain the keys of
# shows that are ready for upload (presumably) so we can look in this array to
# double check.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if [[ $FORCE -eq 0 ]]; then
#
# Collect the current table of shows requiring work. We expect something like:
# timestamp_epoc,ep_num,ep_date,key,status,email
# 1651286617,3617,2022-06-14,fda088e0e3bd5d0353ea6b7569e93b87626ca25976a0a,UPLOADED_TO_IA,lurkingprion@gmail.com
# 1651648589,3619,2022-06-16,e7d3810afa098863d81663418d8640276272284de68f1,UPLOADED_TO_IA,monochromec@gmail.com
# TODO: Reinstate the check for a failure in the query? Se update_state
# TODO: Check for a failure in the query?A
# NOTE: Problem encountered 2022-09-23 because the SSL certificate has expired
#
reservations=$($QUERY2) || {
@ -360,8 +353,8 @@ if [[ $FORCE -eq 0 ]]; then
fi
#
# The query returns the bare show number, but we're using 'hprxxxx' as the
# key in the 'ready' array.
# The query returns the bare number, but we're using 'hprxxxx' as the key in
# the 'ready' array.
#
while read -r line; do
if [[ $line =~ ^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),.*$ ]]; then
@ -381,10 +374,7 @@ fi
#
# Process files. There will be several with the same prefix so look for
# a change of prefix.
#
# The loop is reading from the following pipeline:
# find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort
# a change of prefix
#
while read -r path; do
#
@ -400,8 +390,8 @@ while read -r path; do
_DEBUG "Item $item"
#
# Detect that the item prefix has changed. If it has we've found a new IA
# identifier, so work on the previous one
# Detect that the item prefix has changed. If it has we're processing
# a new IA identifier, so work on this one
#
if [[ $item != "$lastitem" ]]; then
lastitem=$item
@ -435,8 +425,7 @@ while read -r path; do
processed+=([$lastitem]=1)
else
#
# Is the show ready for upload? We don't check if force mode
# is on. If not ready we skip this show.
# Is the show ready for upload?
#
if [[ $FORCE -eq 0 ]]; then
if [[ ! -v "ready[$lastitem]" ]]; then
@ -483,9 +472,10 @@ while read -r path; do
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
#-------------------------------------------------------------------------------
#
# Write the processed array to the cache file unless in dry-run mode
#-------------------------------------------------------------------------------
#
# [ $DEBUG -eq 1 ] && { echo -n 'D> '; declare -p processed; }
_DEBUG "processed = ${!processed[*]}"
[ "$VERBOSE" -eq 1 ] && echo "Number of shows in cache: ${#processed[@]}"
if [[ $DRYRUN -ne 1 ]]; then
@ -494,26 +484,24 @@ if [[ $DRYRUN -ne 1 ]]; then
done < <(printf '%s\n' "${!processed[@]}" | sort -u ) > "$PROCFILE"
fi
#-------------------------------------------------------------------------------
#
# Generate the list of uploads for the 'make_metadata' option '-list=1,2,3'.
# The show numbers are keys in the associative array 'uploads'. The
# end-product is a comma-separated list of the keys in the variable '$list'.
# Order is unimportant because make_metadata sorts internally.
#-------------------------------------------------------------------------------
#
_DEBUG "uploads = ${!uploads[*]}"
[ "$VERBOSE" -eq 1 ] && echo "Number of shows for upload: ${#uploads[@]}"
printf -v list '%s,' "${!uploads[@]}"
list="${list:0:-1}"
#-------------------------------------------------------------------------------
#
# If there are no uploads to do we can stop
#-------------------------------------------------------------------------------
#
[[ ! -v uploads[@] ]] && { echo "Nothing to do!"; exit; }
#-------------------------------------------------------------------------------
#
# Check that the shows being uploaded have all their files and log what is
# happening.
#-------------------------------------------------------------------------------
#
while read -r show; do
echo "$(date +%Y%m%d%H%M%S) preparing to upload hpr$show" >> "$LOGFILE"
@ -524,10 +512,10 @@ while read -r show; do
fi
done < <(printf '%s\n' "${!uploads[@]}" | sort)
#-------------------------------------------------------------------------------
#
# Define output files. If the list contains one element then it's a different
# name from the multi-element case (make_metadata does this too).
#-------------------------------------------------------------------------------
#
if [[ ${#uploads[@]} -eq 1 ]]; then
metadata="metadata_${minshow}.csv"
script="script_${minshow}.sh"
@ -536,9 +524,9 @@ else
script="script_${minshow}-${maxshow}.sh"
fi
#-------------------------------------------------------------------------------
#
# Perform the uploads or report what would be done
#-------------------------------------------------------------------------------
#
if [[ $DRYRUN -eq 1 ]]; then
echo "Dry run: Would have uploaded list '$list'"
echo "Dry run: Would have created $metadata and $script"
@ -585,17 +573,17 @@ else
echo "$(date +%Y%m%d%H%M%S) ${#uploads[@]} uploads completed" >> "$LOGFILE"
#
# Update the state of all the shows being processed in the
# HPR database, unless we're using FORCE.
# Update the state in the HPR database, unless we're using
# FORCE. Pass the limit used here to this script so it can
# stop looking for work unnecessarily
#
if [[ $FORCE -eq 0 ]]; then
while read -r show; do
if update_show_state $show; then
echo "Updated state for show $show"
else
echo "Failed to update state for show $show"
fi
done < <(printf '%s\n' "${!uploads[@]}" | sort)
$UPSTATE -l$LIMIT
RES=$?
if [[ $RES -ne 0 ]]; then
echo "Problem updating database state"
exit 1
fi
else
echo "Not updating the database, FORCE mode is on"
fi

View File

@ -1,245 +0,0 @@
#!/usr/bin/env perl
#===============================================================================
#
# FILE: reformat_html
#
# USAGE: ./reformat_html < input.html > output.html
#
# DESCRIPTION: Reformats the HTML found in the HPR database in the 'notes'
# field to the format required in the 'description' field of an
# item on the IA. It reads from STDIN and writes to STDOUT.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.1
# CREATED: 2025-02-09 22:56:30
# REVISION: 2025-02-13 11:13:37
#
#===============================================================================
use v5.36;
use strict;
use warnings;
use feature qw{ say try };
no warnings qw{ experimental::try };
use open ':std', ':encoding(UTF-8)'; # Make all IO UTF-8
use HTML::TreeBuilder 5 -weak;
use HTML::Entities;
#
# Version number (Incremented by Vim)
#
our $VERSION = '0.0.1';
#
# Declarations
#
my ($verbose, @notes, $notes, $tree);
#
# Read the input data into an array
#
try {
@notes = <STDIN>;
}
catch ($e) {
warn "Problem reading input HTML; $e";
exit 1;
}
die "No input HTML detected\n" unless @notes;
#
# Turn the array into a scalar
#
$notes = join( '', @notes );
#
# Get ready to parse the array
#
$tree = HTML::TreeBuilder->new;
$tree->ignore_unknown(0);
$tree->no_expand_entities(1);
$tree->p_strict(1);
$tree->store_comments(1); # Necessary?
$tree->warn(1);
#
# Parse HTML to the tree structure
#
$tree->parse_content($notes)
or die "HTML::TreeBuilder failed to parse input HTML: $!\n";
#
# Flatten all <pre> tags and add <br/> tags
#
$notes = flatten_pre($tree);
#
# Deal with non-ASCII
#
$notes = encode_entities( $notes, '^\n&\x20-\x25\x27-\x7e' );
#
# Remove all newlines
#
$notes =~ s/\n//g;
#
# Write the end result to the STDOUT
#
say $notes;
exit;
#=== FUNCTION ================================================================
# NAME: flatten_pre
# PURPOSE: Process notes "flattening" <pre> contents
# PARAMETERS: $tree HTML::TreeBuilder object containing parsed and
# partially processed notes
# RETURNS: Processed notes
# DESCRIPTION: The HTML "<pre>" tag encloses preformatted text. It can also
# contain some formatting tags like <em> and <code>, but spaces
# and newlines are significant. The Internet Archive upload API
# uses HTTP headers which are text strings without newlines, so
# when these tags are uploaded through this route some
# formatting is lost. What this routine does is parse the
# contents of all <pre> sections in $notes, adding <br/> tags
# to replace newlines. It has to perform a full parse
# since the contents may include HTML tags and these need to be
# passed through intact. It calls the subroutine 'flatten_item' to
# deal with the recursive nature of HTML tags.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub flatten_pre {
my ($tree) = @_;
#
# Find all the <pre> tags
#
my @pre_tags = $tree->look_down( _tag => 'pre', );
#
# Walk the various <pre> elements in the document
#
foreach my $tag (@pre_tags) {
#
# Save the tag and empty the original
#
my $saved = $tag->clone();
$tag->delete_content();
#
# Walk the saved content and rebuild the tag into $atag using the
# nested arrayref structure permitted by HTML::Element for
# convenience (the alternative is a little nasty). See the
# documentation for 'new_from_lol' in HTML::Element.
#
my $atag;
foreach my $item ( @{ $saved->content_array_ref } ) {
push( @$atag, flatten_item($item) );
}
#
# Rebuild the tag from the arrayref we built. We treat the arrayref
# structure we just built as an array because otherwise the top level
# is interpreted as a spurious <null> tag.
#
$tag->push_content(@$atag);
}
#
# Trim out the original notes from the enclosing tags we added earlier
#
my $body = $tree->look_down( _tag => 'body' );
( my $result = $body->as_HTML( undef, ' ', {} ) )
=~ s{(^<body[^>]*>|</body>$)}{}gi;
return $result;
}
#=== FUNCTION ================================================================
# NAME: flatten_item
# PURPOSE: Recursively "flatten" items within the enclosing <pre>
# PARAMETERS: $item an HTML::Element item parsed from the original
# <pre> section
# RETURNS: An arrayref if the last seen item was a tag, otherwise a list
# DESCRIPTION: Since <pre> sections can contain inline elements which change
# the rendering of the text we need to parse these as we add
# <br/> tags. This routine does this by recursively descending
# through the contents. A common tag sequence is <pre><code> for
# scripts and the like. This routine deals with such sequences.
# It expects to receive the contents in sequence and builds the
# result as a nested arrayref structure.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub flatten_item {
my ($item) = @_;
return unless defined($item);
my ( @result, %attr );
#
# Is it a sub-tag or non-tag content?
#
if ( ref($item) ) {
#
# It's a tag. Save the tag name and any attributes and recurse into
# it. Return an arrayref
#
push( @result, $item->tag() );
%attr = $item->all_external_attr();
push( @result, \%attr ) if %attr;
for my $child ( $item->content_list() ) {
push( @result, flatten_item($child) );
}
return \@result;
}
else {
#
# It's non-tag content. Join the lines with <br/> tags. Return an
# array (since this is a simple list).
#
# Note that we split with a LIMIT of -1 which causes any trailing list
# items to be returned; default behaviour is to drop them.
#
$item =~ s/\r//g;
my @content = split( /\n/, $item, -1 );
if (@content) {
#
# Remove a leading blank line - usually the result of
# a "<pre>'NL'text" sequence
#
shift(@content) if ( $content[0] =~ /^\s*$/ );
#
# Join back the lines with <br/> tags between them.
#
foreach my $txt (@content) {
push( @result, $txt, ['br'] );
}
#
# Remove the <br/> at the end, it's spurious
#
pop(@result);
}
return (@result);
}
}
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

View File

@ -6,7 +6,7 @@
# USAGE: ./tidy_uploaded [-h] [-v] [-d {0|1}] [-c COUNT]
#
# DESCRIPTION: Relocates HPR audio and other show-related files on 'borg'
# after their shows have been uploaded to the Internet Archive.
# after their shows have been uploaded to the Internet Archive
#
# OPTIONS: ---
# REQUIREMENTS: ---
@ -43,7 +43,7 @@ TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1;
trap 'cleanup_temp $TMP1' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
#
# Configure depending whether local or on 'borg'
# Configure depending whether local or on the VPS
#
case $HOSTNAME in
borg) BASEDIR="$HOME/InternetArchive"
@ -95,7 +95,7 @@ queued_tasks () {
# NAME: movefile
# DESCRIPTION: Moves a file to a new place, catering for any directories in
# the path
# PARAMETERS: $1 directory to move from
# PARAMETERS: $1 directory to move form
# $2 directory to move to
# $3 file (or sub-path to move)
# RETURNS: True if a move was done, otherwise False
@ -356,7 +356,7 @@ while read -r path; do
#
tasks=$(queued_tasks "$item")
if [[ $tasks -gt 0 ]]; then
echo "** Item $item still has $tasks unfinished" \
echo "** Item $item still has $tasks unfinished " \
"$(ngettext task tasks "$tasks")"
echo "** Skipping to the next item"
continue
@ -434,6 +434,9 @@ while read -r path; do
done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' -printf "%CY%Cm%Cd%CH%CM%CS %p\n" | sort | cut -f2 -d' ')
# Old 'find' used:
# done < <(find "$UPLOADS" -regextype posix-extended -regex '.*hpr[0-9]{4}.*' | sort)
#
# No shows processed? There was nothing to do
#

View File

@ -3,7 +3,7 @@
#
# FILE: update_state
#
# USAGE: ./update_state [-h] [-D] [-d] [-F] [-l N] [-m]
# USAGE: ./update_state
#
# DESCRIPTION: A script to update the state of shows which have been sent to
# the IA. It looks at the current state of the 'reservations'
@ -136,6 +136,7 @@ esac
cd "$BASEDIR" || { echo "Can't cd to $BASEDIR"; exit 1; }
#
# Tools
#