From 37699fae8fb2c70fc230c4649312241d0793abdb Mon Sep 17 00:00:00 2001 From: Ken Fallon Date: Fri, 9 Jan 2026 17:29:41 +0100 Subject: [PATCH] Removed borg, workaround for span --- workflow/hpr-check-feeds.bash | 3 + workflow/process_episode.bash | 119 +++++----------------------------- 2 files changed, 21 insertions(+), 101 deletions(-) diff --git a/workflow/hpr-check-feeds.bash b/workflow/hpr-check-feeds.bash index bb9d2c0..1d04d4a 100755 --- a/workflow/hpr-check-feeds.bash +++ b/workflow/hpr-check-feeds.bash @@ -4,6 +4,7 @@ tmpdir=$(mktemp --directory /tmp/"$( basename ${0} ).XXXXXX" ) base="https://hackerpublicradio.org" +reportfile="${tmpdir}/report.txt" for feed in comments.rss comments_rss.php hpr_mp3.rss hpr_mp3_rss.php hpr_mp3_rss.php?series=47 hpr_ogg.rss hpr_ogg_rss.php hpr_ogg_rss.php?series=47 hpr_opus_rss.php hpr_opus_rss.php?series=47 hpr_rss_mp3.php hpr_rss.php hpr_spx.rss hpr.sql.rss hpr_total_mp3.rss hpr_total_ogg.rss hpr_total_ogg_rss.php hpr_total_opus_rss.php hpr_total_rss.php hpr_total_spx.rss rss-ccdnv1.php rss-future.php rss.php do tmpfile="${tmpdir}/${feed}" @@ -25,6 +26,8 @@ do fi done +# check_urls_in_file.bash + read -p "Remove the feeds ? (Y|n) ? " -n 1 -r echo # (optional) move to a new line if [[ ! $REPLY =~ ^[Yy]$ ]] diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index 49e7a87..13159b4 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -92,7 +92,7 @@ function program_checks() { done } - is_installed audio2image.bash awk base64 cat csvtojson curl date eval extract_images ffprobe ffmpeg file find grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget hpr-check-ccdn-links + is_installed audio2image.bash awk base64 cat csvtojson curl date eval extract_images ffprobe ffmpeg file find fix_tags grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget hpr-check-ccdn-links for arg in $* do @@ -623,7 +623,7 @@ function get_next_show_from_hpr_hub() { check_variable_is_correct processing_dir HOME - if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] + if [ "$( curl --location --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] then echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" fi @@ -637,7 +637,7 @@ function get_next_show_from_hpr_hub() { if [ -z "${response}" ] then echo_debug "Getting a list of all the reservations." - curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n + curl --location --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n echo_error "There appear to be no more shows with the status \"SHOW_SUBMITTED\"." fi @@ -689,7 +689,7 @@ function get_ep_num_from_local_dir() { echo_debug "Attempting to download information for episode \"${ep_num}\"" - if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${shownotes_json}" )" != 200 ] + if [ "$( curl --location --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${shownotes_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted." fi @@ -857,11 +857,12 @@ function extract_images_brute_force() { echo_error "The shownotes_html file \"${shownotes_html}\" could not be found." fi + ## TODO fix for https://repo.anhonesthost.net/HPR/hpr_hub/issues/102 and https://github.com/slab/quill/discussions/4766 + + sed -e 's/]*>//g; s/<\/span>//g' -e 's/]*>//g; s/<\/div>//g' -e 's/]*>//g; s/<\/code>/<\/code>/g' -e "s#>#>\n#g" "${shownotes_html}" | sponge "${shownotes_html}" + ## TODO Temp fix until https://repo.anhonesthost.net/HPR/hpr-tools/issues/3 is available - sed "s#>#>\n#g" "${shownotes_html}" | sponge "${shownotes_html}" - - # Extract embedded images image_count_embedded="1" @@ -1359,7 +1360,7 @@ function post_show_to_hpr_db() { curl --netrc --show-headers --request POST "https://hub.hackerpublicradio.org/cms/add_show_json.php" --header "Content-Type: application/json" --data-binary "@${post_show_json}" - if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" != 200 ] + if [ "$( curl --location --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi @@ -1385,7 +1386,7 @@ function get_variables_from_episode_summary_json() { if [ ! -s "${episode_summary_json}" ] then echo_debug "The \"episode_summary_json\" variable/file is missing, getting a new version." - if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" )" != 200 ] + if [ "$( curl --location --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi @@ -1976,7 +1977,7 @@ function register_assets() { echo_error "The asset json file \"${assets_json}\" is missing."; fi - response="$( curl --silent --netrc-file $HOME/.netrc --write-out '%{http_code}' --output /dev/null --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @"${assets_json}" --header "Content-Type: application/json" )" + response="$( curl --location --silent --netrc-file $HOME/.netrc --write-out '%{http_code}' --output /dev/null --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @"${assets_json}" --header "Content-Type: application/json" )" if [[ -z "${response}" || "${response}" != "200" ]] then echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" @@ -2055,7 +2056,7 @@ function copy_files_to_hpr_server() { fi echo_debug "Getting the \"files_json\" file from the HPR site." - if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ] + if [ "$( curl --location --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi @@ -2101,88 +2102,6 @@ function copy_files_to_git() { } -################################################# -# Send the derived files to the server borg to be sent to borg - -function copy_derived_files_to_borg() { - - echo_debug "Sending the derived files to the admin server borg. copy_derived_files_to_borg()" - - check_variable_is_correct working_dir ep_num shownotes_html - - for extension in flac mp3 ogg opus wav - do - if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]] - then - echo_error "The derived files to the admin server borg is missing \"hpr${ep_num}.${extension}\"." - ls -al "${working_dir}/hpr${ep_num}.${extension}" - fi - done - - echo_debug "rsync -ave ssh --partial --progress \"${working_dir}/hpr${ep_num}.{flac,mp3,ogg,opus,wav}\" hpradmin@borg:/data/IA/uploads/" - rsync -ave ssh --partial --progress "${working_dir}/hpr${ep_num}".{flac,mp3,ogg,opus,wav} hpradmin@borg:/data/IA/uploads/ - rsync_error="${?}" - if [ "${rsync_error}" -ne "0" ] - then - echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/\" failed with error ${rsync_error}" - fi - - rsync -ave ssh --partial --progress "${working_dir}/hpr${ep_num}".{txt,srt} hpradmin@borg:/data/IA/uploads/hpr${ep_num}/ - rsync_error="${?}" - if [ "${rsync_error}" -ne "0" ] - then - echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/hpr${ep_num}/\" failed with error ${rsync_error}" - fi - - rsync -ave ssh --partial --progress "${shownotes_edited}" hpradmin@borg:/data/IA/uploads/hpr${ep_num}/shownotes.html - rsync_error="${?}" - if [ "${rsync_error}" -ne "0" ] - then - echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/hpr${ep_num}/shownotes.html\" failed with error ${rsync_error}" - fi - - # Get the current status - if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] - then - echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" - fi - - # Check the current status is correct SHOW_POSTED - response=$( grep --perl-regexp ",${ep_num},.*SHOW_POSTED," "${processing_dir}/status.csv" | head -1 | sed 's/,/ /g' ) - if [ -z "${response}" ] - then - grep --perl-regexp ",${ep_num},.*SHOW_POSTED," "${processing_dir}/status.csv" - echo_debug "The show \"${ep_num}\" hasn't the correct status of \"SHOW_POSTED\" in the database." - fi - - # Try and change the online db status to MEDIA_TRANSCODED - if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' "https://hub.hackerpublicradio.org/cms/status.php?ep_num=${ep_num}&status=MEDIA_TRANSCODED" )" != 200 ] - then - echo_debug "Could not change the status of \"${ep_num}\" to \"MEDIA_TRANSCODED\"" - fi - - # Get the current status - if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] - then - echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" - fi - - # Check the current status is correct MEDIA_TRANSCODED - response=$( grep --perl-regexp ",${ep_num},.*MEDIA_TRANSCODED," "${processing_dir}/status.csv" | head -1 | sed 's/,/ /g' ) - if [ -z "${response}" ] - then - grep --perl-regexp ",${ep_num},.*MEDIA_TRANSCODED," "${processing_dir}/status.csv" - echo_debug "The show \"${ep_num}\" hasn't the correct status of \"MEDIA_TRANSCODED\" in the database." - fi - - echo_debug "The show \"${ep_num}\" has the correct status of \"MEDIA_TRANSCODED\" in the database." - - #TODO images - - # Picture 1 shows the broken dog    walking accessory. - -} - ################################################# # Wait for the Internet Archive to finish processing @@ -2202,7 +2121,7 @@ function wait_for_the_internet_archive_to_process() { } ################################################# -# Send the derived files to the server borg to be sent to the Internet Archive +# Create entry on the Internet Archive function create_item_on_the_internet_archive() { @@ -2232,7 +2151,7 @@ function create_item_on_the_internet_archive() { } ################################################# -# Send the derived files to the server borg to be sent to the Internet Archive +# Send the derived files to the Internet Archive function upload_files_to_the_internet_archive() { @@ -2252,7 +2171,7 @@ function upload_files_to_the_internet_archive() { fi echo_debug "Getting the \"files_xml\" file from the IA." - if [ "$( curl --silent --netrc --location --write-out '%{http_code}' https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml --output "${files_xml}" )" != 200 ] + if [ "$( curl --location --silent --netrc --write-out '%{http_code}' https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml --output "${files_xml}" )" != 200 ] then echo_error "Could not find file list \"https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml\"" fi @@ -2271,7 +2190,7 @@ function upload_files_to_the_internet_archive() { fi echo_debug "Getting the \"files_json\" file from the HPR site." - if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ] + if [ "$( curl --location --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi @@ -2382,7 +2301,7 @@ else echo_debug "Skipping get_working_dir()" fi -if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" == 200 ] +if [ "$( curl --location --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" == 200 ] then skip_post_show="true" echo_debug "The Episode hpr${ep_num} has already been posted." @@ -2429,8 +2348,6 @@ copy_files_to_hpr_server copy_files_to_git -copy_derived_files_to_borg - create_item_on_the_internet_archive upload_files_to_the_internet_archive @@ -2452,5 +2369,5 @@ exit 0 # echo_debug "The setting \"force_overwrite\" is set to false, so when files exist the program will skip files if they exist." # fi -# curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" +# curl --location --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" # curl --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${working_dir}/shownotes.json"