forked from HPR/hpr-tools
		
	Updates since 2024-06-15
Database/query2tt2: comment and documentation updates; use of Perl's
    try/catch.
InternetArchive/.make_metadata.cfg: added comments for readability
InternetArchive/make_metadata: bug fix needed now that all shows on the HPR server have
    a directory with assets under it.
InternetArchive/repair_assets: new Bash script in development. Collects
    assets from the IA and uploads them to a new directory on the HPR
    server. Will run 'fix_asset_links' (to repair asset links for their
    new directories) once it is ready.
InternetArchive/repair_item: Bash script which was originally written to
    run on 'borg' and upload files to a new IA item when the uploads
    timed out. Now enhanced to upload missing files recovered from the
    HPR backup disk, such as transcripts.
			
			
This commit is contained in:
		| @@ -1,16 +1,62 @@ | ||||
| # Version for i7-desktop | ||||
| # .make_metadata.cfg 2023-07-06 11:54:49 | ||||
| # .make_metadata.cfg 2024-07-08 13:55:23 | ||||
| # | ||||
|  | ||||
| # | ||||
| # A sanity check value in case an episode number given is too big | ||||
| # | ||||
| max_epno = 9000 | ||||
|  | ||||
| # | ||||
| # This is where the script will look for the audio files for upload (if there | ||||
| # are other "assets" it finds them itself) | ||||
| # | ||||
| #uploads = "/var/IA/uploads" # on the VPS and marvin | ||||
| uploads = "/home/cendjm/HPR/IA/uploads" | ||||
|  | ||||
| # | ||||
| # How a "standard" audio file name is made up | ||||
| # | ||||
| filetemplate = "hpr%04d.%s" | ||||
|  | ||||
| # | ||||
| # How to fill in the "missing bit" in relative URLs | ||||
| # | ||||
| baseURL = "https://hackerpublicradio.org/" | ||||
|  | ||||
| # | ||||
| # *** OBSOLETE *** | ||||
| # If we need to fetch the MP3 version of the audio, which we do for older | ||||
| # shows, these are under the 'local' directory. The 'eps' files are actually | ||||
| # redirections to the IA. This is not normally used for the weekly uploads. | ||||
| # | ||||
| #URLtemplate = "http://hackerpublicradio.org/eps/%s" | ||||
| #URLtemplate = "https://hackerpublicradio.org/local/%s" | ||||
|  | ||||
| # | ||||
| # Printf/sprintf template for building an URL which points back to the current | ||||
| # show on the HPR site. | ||||
| # | ||||
| sourceURLtemplate = "https://hackerpublicradio.org/eps/%s/index.html" | ||||
|  | ||||
| # | ||||
| # If we are having to collect assets from the HPR server and upload them to | ||||
| # the IA server we want the final product to be addressable according to the | ||||
| # following URL template. | ||||
| # | ||||
| IAURLtemplate = "https://archive.org/download/%s/%s" | ||||
|  | ||||
| # | ||||
| # We build a Bash script to perform the upload of files which aren't in the | ||||
| # CSV generated by make_metadata. We used to use the plain 'ia upload' command | ||||
| # but now we call a Bash function declared in the script which is slightly | ||||
| # cleverer. We need to do this to get round the IA code's tendency to "derive" | ||||
| # all audio, and in doing so strip any audio tags. We perform our own | ||||
| # equivalent of "derive" *with* the tags and upload them telling the IA *not* | ||||
| # to re-derive. Mostly it listens. There's also a whole thing about IA keeping | ||||
| # history of deletions which we want to turn off otherwise our items become | ||||
| # stuffed with unwanted garbage. | ||||
| # | ||||
| #iauploadtemplate = "ia upload %s %s --remote-name=%s" | ||||
| iauploadtemplate = "Upload %s %s '%s' '%s'" | ||||
| iauploadoptions = "--retries=5 --no-derive -H x-archive-keep-old-version:0" | ||||
|  | ||||
|   | ||||
										
											Binary file not shown.
										
									
								
							| @@ -19,21 +19,24 @@ | ||||
| #               and this version (0.4.12) made into the main line version | ||||
| #               because 4.14 was developing in a direction that doesn't fit | ||||
| #               with the changes made to the HPR system in June/July 2023. | ||||
| #               Will now move forward with version numbers. | ||||
| #               Will now move forward with version numbers (and will get | ||||
| #               a duplicate). | ||||
| #               2024-01-23: Added the 'open' pragma for UTF-8 | ||||
| #               2024-07-08: Fixed a bug where the top-level directory was | ||||
| #               being added to assets paths. See the definition of $linkre for | ||||
| #               more detals. | ||||
| #               ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.4.14 | ||||
| #      CREATED: 2014-06-13 12:51:04 | ||||
| #     REVISION: 2024-01-23 16:28:59 | ||||
| #     REVISION: 2024-07-08 15:21:02 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| use 5.010; | ||||
| use strict; | ||||
| use warnings; | ||||
| use open ':encoding(UTF-8)'; | ||||
| #use utf8; | ||||
| use open ':std', ':encoding(UTF-8)'; | ||||
|  | ||||
| use Carp; | ||||
| use Getopt::Long; | ||||
| @@ -1527,8 +1530,21 @@ sub find_links_in_notes { | ||||
|     #   http://www.hackerpublicradio.org/eps/hpr1303/Music_Notes.html | ||||
|     # Also things like this (**Why Ken?**) | ||||
|     #   ../eps/hpr2945/IMG_20191018_122746Z.jpg | ||||
|     # Don't match things like when *not* processing 1986: | ||||
|     # Don't match things like this when *not* processing 1986: | ||||
|     #   http://hackerpublicradio.org/eps/hpr1986/full_shownotes.html#example-2 | ||||
|     # ---------------------------------------------------------------------- | ||||
|     # NOTE: 2024-07-08 | ||||
|     # | ||||
|     # It used to be that we added a top-level hprXXXX directory to URLs | ||||
|     # because there wasn't one on the HPR server. This was because the | ||||
|     # majority of shows without assets had no files; the notes were taken from | ||||
|     # the database and displayed dynamically. | ||||
|     # | ||||
|     # Now all HPR shows have a top-level directory for holding the index.html | ||||
|     # with the pre-created notes page. So we DO NOT want to create that | ||||
|     # top-level part. The RE below matches but doesn't store it or we'd get | ||||
|     # one too many directory levels. | ||||
|     # ---------------------------------------------------------------------- | ||||
|     # | ||||
|     $epstr = sprintf( "hpr%04d", $episode ); | ||||
| #   my $re | ||||
| @@ -1537,6 +1553,7 @@ sub find_links_in_notes { | ||||
|         ^https?:// | ||||
|         (?:www.)? | ||||
|         (?:hacker|hobby)publicradio.org/eps/ | ||||
|         $epstr/ | ||||
|         (.+)$ | ||||
|     }x; | ||||
|  | ||||
| @@ -1558,7 +1575,7 @@ sub find_links_in_notes { | ||||
|         _debug( $DEBUG >= 3, "\$uri = $uri\n" ); | ||||
|         _debug( $DEBUG >= 3, "\$uri->fragment = " . $uri->fragment ) | ||||
|             if $uri->fragment; | ||||
|         _debug( $DEBUG >= 3, "\$slink = $slink, \n" ); | ||||
|         _debug( $DEBUG >= 3, "\$slink = $slink\n" ); | ||||
|  | ||||
|         # | ||||
|         # Is it an HPR link? | ||||
| @@ -1760,7 +1777,7 @@ sub find_links_in_file { | ||||
|     #   http://www.hackerpublicradio.org/eps/hpr1303/Music_Notes.html | ||||
|     # Also things like this (**Why Ken?**) | ||||
|     #   ../eps/hpr2945/IMG_20191018_122746Z.jpg | ||||
|     # Don't match things like when *not* processing 1986: | ||||
|     # Don't match things like this when *not* processing 1986: | ||||
|     #   http://hackerpublicradio.org/eps/hpr1986/full_shownotes.html#example-2 | ||||
|     # | ||||
|     $epstr = sprintf( "hpr%04d", $episode ); | ||||
|   | ||||
							
								
								
									
										627
									
								
								InternetArchive/repair_assets
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										627
									
								
								InternetArchive/repair_assets
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,627 @@ | ||||
| #!/bin/bash - | ||||
| #=============================================================================== | ||||
| # | ||||
| #         FILE: repair_assets | ||||
| # | ||||
| #        USAGE: ./repair_assets showid | ||||
| # | ||||
| #  DESCRIPTION: Given a show where there was a directory of asset files on the | ||||
| #               old HPR server whichj got lost in the migration, rebuild it | ||||
| #               and fill it with assets from the IA. Modify the show notes to | ||||
| #               point to these recovered assets. | ||||
| # | ||||
| #      OPTIONS: --- | ||||
| # REQUIREMENTS: --- | ||||
| #         BUGS: --- | ||||
| #        NOTES: --- | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.0.6 | ||||
| #      CREATED: 2024-05-10 21:26:31 | ||||
| #     REVISION: 2024-07-10 15:12:54 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| # set -o nounset                              # Treat unset variables as an error | ||||
|  | ||||
| VERSION="0.0.6" | ||||
|  | ||||
| SCRIPT=${0##*/} | ||||
| # DIR=${0%/*} | ||||
|  | ||||
| STDOUT="/dev/fd/2" | ||||
|  | ||||
| # | ||||
| # Select the appropriate working directory for the host | ||||
| # | ||||
| case $(hostname) in | ||||
|     i7-desktop) | ||||
|         BASEDIR="$HOME/HPR/InternetArchive" | ||||
|         ;; | ||||
|     borg) | ||||
|         BASEDIR="$HOME/IA" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "Wrong host!" | ||||
|         exit 1 | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| cd "$BASEDIR" || { echo "Failed to cd to $BASEDIR"; exit 1; } | ||||
|  | ||||
| # | ||||
| # Load library functions | ||||
| # | ||||
| LIB="$HOME/HPR/function_lib.sh" | ||||
| [ -e "$LIB" ] || { echo "Unable to source functions"; exit; } | ||||
| # shellcheck disable=SC1090 | ||||
| source "$LIB" | ||||
|  | ||||
| # | ||||
| # Enable coloured messages | ||||
| # | ||||
| define_colours | ||||
|  | ||||
| # | ||||
| # Sanity checks | ||||
| # | ||||
| IA=$(command -v ia) | ||||
| [ -n "$IA" ] || { echo "Program 'ia' was not found"; exit 1; } | ||||
| Q2T=$(command -v query2tt2) | ||||
| [ -n "$Q2T" ] || { echo "Program 'query2tt2' was not found"; exit 1; } | ||||
| FIXAL="$BASEDIR/fix_asset_links" | ||||
| [ -e "$FIXAL" ] || { echo "Program '$FIXAL' was not found"; exit 1; } | ||||
|  | ||||
| # | ||||
| # Make temporary files and set traps to delete them | ||||
| # | ||||
| TMP1=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } | ||||
| TMP2=$(mktemp) || { echo "$SCRIPT: creation of temporary file failed!"; exit 1; } | ||||
| trap 'cleanup_temp $TMP1 $TMP2' SIGHUP SIGINT SIGPIPE SIGTERM EXIT | ||||
|  | ||||
| # {{{ -- Functions -- _verbose, _usage, _log, find_missing, make_dir | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: find_missing | ||||
| #  DESCRIPTION: Given two arrays containing IA assets and HPR assets, | ||||
| #               determine which IA assets are missing from the HPR list. | ||||
| #   PARAMETERS: $1      (nameref) IA list | ||||
| #               $2      (nameref) HPR list | ||||
| #               $3      Name of array to receive list of missing assets | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| find_missing () { | ||||
|     local -n IA="${1}" | ||||
|     local -n HPR="${2}" | ||||
|     local output="${3}" | ||||
|  | ||||
|     local -A hIA hHPR | ||||
|     local i key | ||||
|  | ||||
|     # | ||||
|     # Make a hash keyed by the IA file base names from an indexed array | ||||
|     # | ||||
|     for (( i=0; i<${#IA[@]}; i++ )); do | ||||
|         hIA+=([${IA[$i]##*/}]=${IA[$i]}) | ||||
|     done | ||||
|  | ||||
|     # | ||||
|     # Make a hash keyed by the HPR file base names from an indexed array | ||||
|     # | ||||
|     for (( i=0; i<${#HPR[@]}; i++ )); do | ||||
|         hHPR+=([${HPR[$i]##*/}]=${HPR[$i]}) | ||||
|     done | ||||
|  | ||||
|     # | ||||
|     # Use the basename keys to check what's missing, but return the full path | ||||
|     # names. | ||||
|     # | ||||
|     for key in "${!hIA[@]}"; do | ||||
|         if ! exists_in hHPR "$key"; then | ||||
|             eval "$output+=('${hIA[$key]}')" | ||||
|         fi | ||||
|     done | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: make_dir | ||||
| #  DESCRIPTION: Make a directory if it doesn't exist, failing gracefully on | ||||
| #               errors. | ||||
| #   PARAMETERS: $1      directory path | ||||
| #      RETURNS: True if success, otherwise exits the caller script | ||||
| #=============================================================================== | ||||
| make_dir () { | ||||
|     local dir="${1}" | ||||
|  | ||||
|     if [[ ! -d $dir ]]; then | ||||
|         mkdir -p "$dir" || { | ||||
|             coloured 'red' "Failed to create $dir" | ||||
|             exit 1 | ||||
|         } | ||||
|     fi | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _verbose | ||||
| #  DESCRIPTION: Writes a message in verbose mode | ||||
| #   PARAMETERS: *       message strings to write | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _verbose () { | ||||
|     [ "$VERBOSE" -eq 0 ] && return | ||||
|     for msg; do | ||||
|         printf '%s\n' "$msg" | ||||
|     done | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _log | ||||
| #  DESCRIPTION: Appends a record to the file "$LOGFILE" | ||||
| #   PARAMETERS: $1      Message to write | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _log () { | ||||
|     local message="${1}" | ||||
|  | ||||
|     echo "$(date +%F\ %T) $message" >> "$LOGFILE" | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _usage | ||||
| #  DESCRIPTION: Reports usage; always exits the script after doing so | ||||
| #   PARAMETERS: 1 - the integer to pass to the 'exit' command | ||||
| #      RETURNS: Nothing | ||||
| #=============================================================================== | ||||
| _usage () { | ||||
|     local -i result=${1:-0} | ||||
|  | ||||
|     cat >$STDOUT <<-endusage | ||||
| ${SCRIPT} - version: ${VERSION} | ||||
|  | ||||
| Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] showid | ||||
|  | ||||
| Attempts to repair an show where the directory of assets was not transferred | ||||
| from the old HPR server. | ||||
|  | ||||
| Options: | ||||
|   -h                    Print this help | ||||
|   -v                    Run in verbose mode where more information is | ||||
|                         reported. Default is off. If -v is repeated it | ||||
|                         increases the verbosity level (levels 1 and 2 only). | ||||
|   -d 0|1                Dry run: -d 1 (the default) runs the script in dry-run | ||||
|                         mode where nothing is changed but the actions that | ||||
|                         will be taken are reported; -d 0 turns off dry-run | ||||
|                         mode and the actions will be carried out. | ||||
|   -D                    Run in debug mode where a lot more information is | ||||
|                         reported | ||||
|  | ||||
| Arguments: | ||||
|     showid              The show id in the form 'hpr1234' | ||||
|  | ||||
| endusage | ||||
|     exit "$result" | ||||
| } | ||||
|  | ||||
| # }}} | ||||
|  | ||||
| #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Directories and files | ||||
| #------------------------------------------------------------------------------- | ||||
| LOGS="$BASEDIR/logs" | ||||
| make_dir "${LOGS}" | ||||
| LOGFILE="$LOGS/$SCRIPT.log" | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Options | ||||
| #------------------------------------------------------------------------------- | ||||
| # Default settings | ||||
| # | ||||
| VERBOSE=0 | ||||
|  | ||||
| # | ||||
| # Process options | ||||
| # | ||||
| while getopts :d:Dhv opt | ||||
| do | ||||
|     case "${opt}" in | ||||
|         D) DEBUG=1;; | ||||
|         d) DRYRUN=$OPTARG;; | ||||
|         h) _usage 0;; | ||||
|         v) ((VERBOSE++));; | ||||
|         *) echo "** Unknown option" | ||||
|            _usage 1;; | ||||
|     esac | ||||
| done | ||||
| shift $((OPTIND - 1)) | ||||
|  | ||||
| # | ||||
| # Set option defaults and check their values | ||||
| # | ||||
| DRYRUN=${DRYRUN:-1} | ||||
| if [[ $DRYRUN -ne 0 && $DRYRUN -ne 1 ]]; then | ||||
|     coloured 'red' "** Use '-d 0' or '-d 1'" | ||||
|     _usage 1 | ||||
| fi | ||||
| [[ $VERBOSE -gt 0 && $DRYRUN -eq 1 ]] && echo "Dry run mode" | ||||
|  | ||||
| DEBUG=${DEBUG:-0} | ||||
| [[ $DEBUG -eq 1 ]] && coloured 'yellow' "Debug mode" | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Argument check | ||||
| #------------------------------------------------------------------------------- | ||||
| # Should have one argument | ||||
| # | ||||
| if [[ $# != 1 ]]; then | ||||
|     coloured 'red' "Missing argument" | ||||
|     _usage 1 | ||||
| fi | ||||
| show="${1,,}" | ||||
|  | ||||
| # | ||||
| # Ensure show id is correctly formatted. We want it to be 'hpr1234' | ||||
| # | ||||
| if [[ $show =~ (hpr)?([0-9]+) ]]; then | ||||
|     printf -v show 'hpr%04d' "${BASH_REMATCH[2]}" | ||||
| else | ||||
|     coloured 'red' "Incorrect show specification: $show" | ||||
|     coloured 'yellow' "Use 'hpr9999' or '9999' format" | ||||
|     exit 1 | ||||
| fi | ||||
| _DEBUG "Parsed item: $show" | ||||
| echo "Processing show $show" | ||||
| _log "Processing show $show; dry-run: $([ "$DRYRUN" -eq 1 ] && echo "on" || echo "off")" | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Declarations and constants | ||||
| #------------------------------------------------------------------------------- | ||||
| declare -a iacache | ||||
|  | ||||
| # | ||||
| # SHOWURL is where the show will be on the webserver | ||||
| # | ||||
| printf -v SHOWURL 'https://hackerpublicradio.org/eps/%s/index.html' "$show" | ||||
|  | ||||
| # | ||||
| # CACHEDIR is where we store asset details and files | ||||
| # | ||||
| CACHEDIR="$BASEDIR/assets" | ||||
| [ ! -d "$CACHEDIR" ] && { | ||||
|     coloured 'red' "Creating cache directory" | ||||
|     make_dir "$CACHEDIR" | ||||
| } | ||||
|  | ||||
| # | ||||
| # Pointers into the cache: | ||||
| # LOCAL_ASSETDIR  - where the cache for this show lives | ||||
| # LOCAL_FILEDIR   - where the IA files have been placed | ||||
| # LOCAL_PARENTDIR - the equivalent directory to the top show dir | ||||
| # | ||||
| LOCAL_ASSETDIR="$CACHEDIR/${show}" | ||||
| LOCAL_FILEDIR="$LOCAL_ASSETDIR/files" | ||||
| LOCAL_PARENTDIR="$LOCAL_FILEDIR/${show}" | ||||
|  | ||||
| # | ||||
| # Pointers to the HPR server directories: | ||||
| # REMOTE_ASSETDIR  - where the assets are to go | ||||
| # REMOTE_PARENTDIR - the remote parent directory | ||||
| # | ||||
| REMOTE_ASSETDIR="public_html/eps/${show}/${show}" | ||||
| REMOTE_PARENTDIR="public_html/eps/${show}" | ||||
|  | ||||
| CMDTPL='ssh hpr@hackerpublicradio.org %s' | ||||
|  | ||||
| MANIFEST="$CACHEDIR/$show/manifest" | ||||
| DBNOTES="$CACHEDIR/$show/notes.html" | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Check the show exists in the database (or is visible on the website). | ||||
| #------------------------------------------------------------------------------- | ||||
| _verbose "Checking the show exists on the HPR server" | ||||
| result=$(curl --head --silent --write-out "%{http_code}" --output /dev/null "$SHOWURL") | ||||
| if [[ $result -eq 404 ]]; then | ||||
|     coloured 'red' "Could not detect show '$show' on the HPR server" | ||||
|     _log "Show '$show' not on the HPR server" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Check the show exists on the IA | ||||
| #------------------------------------------------------------------------------- | ||||
| _verbose "Checking the show exists on the IA server" | ||||
| if ! ia metadata "$show" --exists > /dev/null 2>&1; then | ||||
|     coloured 'red' "Could not detect show '$show' on the IA server" | ||||
|     coloured 'yellow' "Check that archive.org is available" | ||||
|     coloured 'yellow' "Try https://downfor.io/internet-archive" | ||||
|     _log "Show '$show' not on the IA server" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Check IA, collect contents, classify them | ||||
| #------------------------------------------------------------------------------- | ||||
| # Interrogate the IA for the required item contents. If it returns True we can | ||||
| # collect its contents, otherwise we can't proceed. The file 'TMP1' contains | ||||
| # just a simple list of the files on the IA relating to this item. | ||||
| # | ||||
| _verbose "Collecting filenames from the IA server" | ||||
| if ia list "$show" > "$TMP1"; then | ||||
|     while read -r iafile; do | ||||
|         iacache+=("$iafile") | ||||
|     done < "$TMP1" | ||||
| else | ||||
|     coloured 'red' "Item $show can't be found on the IA" | ||||
|     coloured 'red' "Can't continue" | ||||
|     _log "Files for show '$show' not on the IA server" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| _DEBUG "$(printf '%s\n' "${iacache[@]}")" | ||||
|  | ||||
| # | ||||
| # Determine which files are assets | ||||
| # | ||||
| _verbose "Categorising files held on the IA" | ||||
|  | ||||
| declare -a audio ia_transcript ia_asset | ||||
|  | ||||
| audio_re="^${show}\.(flac|mp3|ogg|opus|spx|wav)\$" | ||||
| # transcript_re="^${show}/${show}/${show}\.(json|srt|tsv|txt|vtt)\$" | ||||
| transcript_re="^${show}/${show}\.(json|srt|tsv|txt|vtt)\$" | ||||
| asset_re="^${show}/(${show}/)?.*\$" | ||||
| metadata_re="^(__ia_thumb.jpg|${show}[^/]+\.(afpk|torrent|gz|xml|sqlite|png))\$" | ||||
|  | ||||
| for file in "${iacache[@]}"; do | ||||
|     if [[ $file =~ $audio_re ]]; then | ||||
|         audio+=("$file") | ||||
|     elif [[ $file =~ $metadata_re ]]; then | ||||
|         _verbose "Skipping $file" | ||||
|         continue | ||||
|     elif [[ $file =~ $transcript_re ]]; then | ||||
|         ia_transcript+=("$file") | ||||
|     elif [[ $file =~ $asset_re ]]; then | ||||
|         ia_asset+=("$file") | ||||
|     fi | ||||
| done | ||||
|  | ||||
| # | ||||
| # Report what was collected at verbosity level 2 | ||||
| # | ||||
| if [[ $VERBOSE -gt 1 ]]; then | ||||
|     coloured 'cyan' "** audio (${#audio[@]}):" | ||||
|     printf '%s\n' "${audio[@]}" | ||||
|  | ||||
|     coloured 'cyan' "** transcript (${#ia_transcript[@]}):" | ||||
|     printf '%s\n' "${ia_transcript[@]}" | ||||
|  | ||||
|     coloured 'cyan' "** asset (${#ia_asset[@]}):" | ||||
|     printf '%s\n' "${ia_asset[@]}" | ||||
|  | ||||
|     _log "IA asset count for show '$show' = ${#ia_asset[@]}" | ||||
| fi | ||||
|  | ||||
| # | ||||
| # No assets, no need to proceed! | ||||
| # | ||||
| if [[ ${#ia_asset[@]} -eq 0 ]]; then | ||||
|     coloured 'green' "No IA assets found for show $show; nothing to do" | ||||
|     _log "Nothing to do for show $show" | ||||
|     exit | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Check what's on the HPR server | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # 'rc' is the remote command template | ||||
| # | ||||
| printf -v rc 'find public_html/eps/%s -type f -printf "%s/%%P\\n"' "$show" "$show" | ||||
|  | ||||
| # | ||||
| # 'command' is the local command we'll run to run a remote command on the HPR | ||||
| # server | ||||
| # | ||||
| # shellcheck disable=SC2059 disable=SC2089 | ||||
| printf -v command "$CMDTPL" "'$rc'" | ||||
|  | ||||
| if [[ $VERBOSE -gt 1 ]]; then | ||||
|     echo "Command: $command" | ||||
| fi | ||||
|  | ||||
| declare -a hpr_asset | ||||
| ignore_re="index.html$" | ||||
|  | ||||
| # | ||||
| # Run the command and save the output. Save the asset names returned in an | ||||
| # array. TODO: Handle errors from the command | ||||
| # | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     eval "$command" > "$TMP2" | ||||
|     RES=$? | ||||
|     if [[ $RES -eq 0 ]]; then | ||||
|         _verbose "$(coloured 'green' "Remote command successful")" | ||||
|         while read -r hprfile; do | ||||
|             if [[ ! $hprfile =~ $ignore_re ]]; then | ||||
|                 hpr_asset+=("${hprfile}") | ||||
|             fi | ||||
|         done < "$TMP2" | ||||
|         _verbose "$(coloured 'green' "Assets found on HPR server = ${#hpr_asset[@]}")" | ||||
|         _verbose "$(printf '%s\n' "${hpr_asset[@]}")" | ||||
|         _log "Assets found on HPR server = ${#hpr_asset[@]}" | ||||
|     else | ||||
|         coloured 'red' "Remote command failed" | ||||
|         _log "Failed while searching for HPR assets" | ||||
|         exit 1 | ||||
|     fi | ||||
| else | ||||
|     coloured 'yellow' "Would have searched for assets on the HPR server" | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Compare the two asset lists and return what's missing on the HPR server | ||||
| #------------------------------------------------------------------------------- | ||||
| declare -a missing | ||||
| find_missing ia_asset hpr_asset missing | ||||
| _verbose "$(coloured 'cyan' "** missing (${#missing[@]}):")" | ||||
| _verbose "$(printf '%s\n' "${missing[@]}")" | ||||
|  | ||||
| if [[ ${#missing[@]} -eq 0 ]]; then | ||||
|     coloured 'green' "No missing assets detected; nothing to do" | ||||
|     _log "No missing assets detected; nothing to do" | ||||
|     exit | ||||
| else | ||||
|     coloured 'yellow' "Found ${#missing[@]} files missing on the HPR server" | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Prepare to copy the missing files | ||||
| #------------------------------------------------------------------------------- | ||||
| make_dir "$LOCAL_FILEDIR" | ||||
|  | ||||
| declare -a downloads | ||||
|  | ||||
| # | ||||
| # Check whether files are already downloaded | ||||
| # | ||||
| for file in "${missing[@]}"; do | ||||
|     if [[ ! -e "$LOCAL_FILEDIR/$show/$file" ]]; then | ||||
|         downloads+=("$file") | ||||
|     fi | ||||
| done | ||||
|  | ||||
| _verbose "$(coloured 'cyan' "** downloads (${#downloads[@]}):")" | ||||
| _verbose "$(printf '%s\n' "${downloads[@]}")" | ||||
|  | ||||
| # | ||||
| # If we have files to download get them now | ||||
| # | ||||
| if [[ ${#downloads[@]} -gt 0 ]]; then | ||||
|     if [[ $DRYRUN -eq 1 ]]; then | ||||
|         coloured 'yellow' "Would have downloaded missing files from the IA" | ||||
|     else | ||||
|         ia download "$show" --destdir="$LOCAL_FILEDIR" "${downloads[@]}" | ||||
|         RES=$? | ||||
|         if [[ $RES -eq 0 ]]; then | ||||
|             coloured 'green' "Downloads complete" | ||||
|             _log "Downloaded IA assets for show $show" | ||||
|         fi | ||||
|     fi | ||||
| else | ||||
|     coloured 'yellow' "IA files are already downloaded" | ||||
| fi | ||||
|  | ||||
| # shellcheck disable=SC2089 | ||||
| RSYNCTPL="rsync -a -e 'ssh' %s hpr@hpr:%s" | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Build the 'ssh' command to make a directory | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # Prepare to make the remote directory if necessary. | ||||
| # | ||||
| # - $rc is the remote command we'll run on the server | ||||
| # - $command is the full 'ssh' command including $rc | ||||
| # | ||||
| printf -v rc 'if [ ! -e "%s" ]; then mkdir -p "%s"; fi' \ | ||||
|     "$REMOTE_ASSETDIR" "$REMOTE_ASSETDIR" | ||||
|  | ||||
| # shellcheck disable=SC2059 disable=SC2089 | ||||
| printf -v command "$CMDTPL" "'$rc'" | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Run or report the command that would be run | ||||
| #------------------------------------------------------------------------------- | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     eval "$command" | ||||
|     RES=$? | ||||
|     if [[ $RES -eq 0 ]]; then | ||||
|         coloured 'green' "Remote directory creation successful" | ||||
|     else | ||||
|         coloured 'red' "Remote directory creation failed" | ||||
|     fi | ||||
| else | ||||
|     coloured 'yellow' "Would have created the remote directory" | ||||
|     echo "$command" | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Synchronise assets to the directory | ||||
| #------------------------------------------------------------------------------- | ||||
| # shellcheck disable=SC2059 disable=SC2089 | ||||
| printf -v command "$RSYNCTPL" "$LOCAL_PARENTDIR/" "$REMOTE_PARENTDIR/" | ||||
|  | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     eval "$command" | ||||
|     RES=$? | ||||
|     if [[ $RES -eq 0 ]]; then | ||||
|         coloured 'green' "Remote upload successful" | ||||
|         _log "Uploaded assets for show $show" | ||||
|     else | ||||
|         coloured 'red' "Remote upload failed" | ||||
|         exit 1 | ||||
|     fi | ||||
| else | ||||
|     coloured 'yellow' "Would have synchronised local assets with the remote directory" | ||||
|     echo "$command" | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Make a 'manifest' file if necessary | ||||
| #------------------------------------------------------------------------------- | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     if [[ ! -e $MANIFEST ]]; then | ||||
|         find "$LOCAL_PARENTDIR" -type f -printf '%P\n' > "$MANIFEST" | ||||
|         _verbose "$(coloured 'green' "Created manifest file")" | ||||
|         _log "Created manifest file $MANIFEST" | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Save the notes from the database if necessary | ||||
| #------------------------------------------------------------------------------- | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     if [[ ! -e $DBNOTES ]]; then | ||||
|         if ! tunnel_is_open; then | ||||
|             open_tunnel | ||||
|         fi | ||||
|         if query2tt2 -config="$BASEDIR/.hpr_livedb.cfg" \ | ||||
|                 -temp="$BASEDIR/query2tt2_nokey.tpl" \ | ||||
|                 -out="$DBNOTES" \ | ||||
|                 -dbarg="${show:3}" \ | ||||
|                 'select notes from eps where id = ?' | ||||
|         then | ||||
|             _verbose "$(coloured 'green' "Created notes file")" | ||||
|             _log "Created notes file $DBNOTES" | ||||
|         else | ||||
|             _verbose "$(coloured 'red' "Creation of notes file failed")" | ||||
|             _log "Creation of notes file $DBNOTES failed" | ||||
|         fi | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Adjust the notes with 'fix_asset_links' | ||||
| #------------------------------------------------------------------------------- | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     echo "$FIXAL" | ||||
|     # $FIXAL | ||||
| fi | ||||
|  | ||||
|  | ||||
| # | ||||
| # All done | ||||
| # | ||||
| if [[ $DRYRUN -eq 0 ]]; then | ||||
|     _log "Repaired show $show" | ||||
| fi | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # √ Make a place to hold the files on this machine | ||||
| # √ Download them from the IA | ||||
| # √ Make a directory on the HPR server | ||||
| # √ Copy the assets to the HPR server | ||||
| #   Modify the notes to point to the assets on the server | ||||
| #------------------------------------------------------------------------------- | ||||
|  | ||||
| # vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker | ||||
| @@ -3,7 +3,7 @@ | ||||
| # | ||||
| #         FILE: repair_item | ||||
| # | ||||
| #        USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] itemname | ||||
| #        USAGE: ./repair_item [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] itemname | ||||
| # | ||||
| #  DESCRIPTION: Repairs an IA "item" (HPR show) if something has failed during | ||||
| #               the upload. | ||||
| @@ -18,20 +18,32 @@ | ||||
| #               temporarily on 'borg') and determines which have not been | ||||
| #               uploaded, then takes steps to perform the uploads. | ||||
| # | ||||
| #               Version 0.0.10 onwards has the capability to repair an IA item | ||||
| #               from the HPR backup disk. This seems to be necessary because | ||||
| #               the transcripts were not carried over (although we are | ||||
| #               adding them to the IA for new shows now, older ones were never | ||||
| #               copied), and there has been a case where none of the assets | ||||
| #               were on the IA. The method used it to place the backup files | ||||
| #               in the directory 'repairs' under the local IA or | ||||
| #               InternetArchive directory. The files are held in the hierarchy | ||||
| #               '$item/$item/'. The assets are in the lower directory and the | ||||
| #               source file is in the upper one. This emulates the placement | ||||
| #               on the IA itself. | ||||
| # | ||||
| #      OPTIONS: --- | ||||
| # REQUIREMENTS: --- | ||||
| #         BUGS: --- | ||||
| #        NOTES: --- | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.0.9 | ||||
| #      VERSION: 0.0.10 | ||||
| #      CREATED: 2020-01-05 22:42:46 | ||||
| #     REVISION: 2024-06-14 18:03:58 | ||||
| #     REVISION: 2024-07-12 14:39:38 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| #set -o nounset                              # Treat unset variables as an error | ||||
|  | ||||
| VERSION="0.0.9" | ||||
| VERSION="0.0.10" | ||||
|  | ||||
| SCRIPT=${0##*/} | ||||
| # DIR=${0%/*} | ||||
| @@ -45,10 +57,12 @@ case $(hostname) in | ||||
|     i7-desktop) | ||||
|         BASEDIR="$HOME/HPR/InternetArchive" | ||||
|         UPLOADS="$HOME/HPR/IA/uploads" | ||||
|         REPAIRS="$BASEDIR/repairs" | ||||
|         ;; | ||||
|     borg) | ||||
|         BASEDIR="$HOME/IA" | ||||
|         UPLOADS="/data/IA/uploads" | ||||
|         REPAIRS="$BASEDIR/repairs" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "Wrong host!" | ||||
| @@ -185,7 +199,7 @@ _usage () { | ||||
|     cat >$STDOUT <<-endusage | ||||
| ${SCRIPT} - version: ${VERSION} | ||||
|  | ||||
| Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] item | ||||
| Usage: ./${SCRIPT} [-h] [-v] [-d {0|1}] [-D] [-l N] [-X] item | ||||
|  | ||||
| Attempts to repair an IA item where the upload has failed for some reason. | ||||
|  | ||||
| @@ -203,6 +217,12 @@ Options: | ||||
|                         during one run of the script. The range is 1 to | ||||
|                         $DEFLIMIT. This can be helpful when there are upload | ||||
|                         problems. | ||||
|   -X                    Run in "extended" mode. In this mode the directory | ||||
|                         holding files to be added to the IA is '~/IA/repairs' | ||||
|                         and the files have most likely come from the HPR | ||||
|                         backup disk and aren't on the IA due some error. We | ||||
|                         want to use the capabilities of ${SCRIPT} to repair | ||||
|                         things and deal with the IA upload problems. | ||||
|  | ||||
| Arguments: | ||||
|     item                The item in the form 'hpr1234' | ||||
| @@ -229,7 +249,7 @@ DEFLIMIT=20 | ||||
| # | ||||
| # Process options | ||||
| # | ||||
| while getopts :d:Dhl:v opt | ||||
| while getopts :d:Dhl:vX opt | ||||
| do | ||||
|     case "${opt}" in | ||||
|         D) DEBUG=1;; | ||||
| @@ -237,6 +257,7 @@ do | ||||
|         h) _usage 0;; | ||||
|         l) LIMIT=$OPTARG;; | ||||
|         v) VERBOSE=1;; | ||||
|         X) EXTENDED=1;; | ||||
|         *) echo "** Unknown option" | ||||
|            _usage 1;; | ||||
|     esac | ||||
| @@ -264,6 +285,8 @@ if [[ $LIMIT -lt 1 || $LIMIT -gt $DEFLIMIT ]]; then | ||||
|     _usage 1 | ||||
| fi | ||||
|  | ||||
| EXTENDED=${EXTENDED:-0} | ||||
|  | ||||
| # | ||||
| # Should have one argument | ||||
| # | ||||
| @@ -295,6 +318,22 @@ if ! ia metadata "$item" --exists > /dev/null 2>&1; then | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # | ||||
| # The -X (EXTENDED) mode is for when we have to upload files that have | ||||
| # mysteriously vanished from the IA. The directories here are equivalent to | ||||
| # those used by 'repair_assets'. There is a top-level directory the represents | ||||
| # the IA item, and below that a hierarchy defining placement under the item. | ||||
| # There is a 'repairs' directory per host in case we need to preair IA stuff | ||||
| # from elsewhere. | ||||
| # | ||||
| if [[ $EXTENDED -eq 1 ]]; then | ||||
|     coloured 'cyan' "Using 'Extended' mode" | ||||
|     if [[ ! -e $REPAIRS ]]; then | ||||
|         mkdir -p "$REPAIRS" | ||||
|     fi | ||||
|     UPLOADS="$REPAIRS/$item" | ||||
| fi | ||||
|  | ||||
| # | ||||
| # Declarations | ||||
| # | ||||
|   | ||||
		Reference in New Issue
	
	Block a user