diff --git a/Miscellaneous/fix_tags.bin b/Miscellaneous/fix_tags.bin index 2c53504..274a0c0 100755 Binary files a/Miscellaneous/fix_tags.bin and b/Miscellaneous/fix_tags.bin differ diff --git a/hpr_tags/fix_tags.bin b/hpr_tags/fix_tags.bin index 9068844..274a0c0 100755 Binary files a/hpr_tags/fix_tags.bin and b/hpr_tags/fix_tags.bin differ diff --git a/workflow/fix_tags_20250201170019.bin b/workflow/fix_tags_20250201170019.bin new file mode 100644 index 0000000..93352da Binary files /dev/null and b/workflow/fix_tags_20250201170019.bin differ diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index 69c1d7c..dbfee95 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -291,6 +291,12 @@ function get_episode_metadata() { echo_error "Failed to extract the shownote html file \"${shownotes_html}\"" fi + ( echo '
' + cat "${shownotes_html}" + echo ' + ' + ) | sponge "${shownotes_html}" + variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised ) for variable in "${variables[@]}" @@ -376,15 +382,20 @@ function extract_images_brute_force() { do this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" wget "${image}" --output-document=${this_image} - this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" - mv -v "${this_image%.*}" "${this_image}.${this_ext}" - this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" - if [ "${this_width}" -gt "400" ] + if [ -s "${this_image}" ] then - echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." - magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" + this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" + mv -v "${this_image%.*}" "${this_image}.${this_ext}" + this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" + if [ "${this_width}" -gt "400" ] + then + echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." + magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" + fi + ((image_count_external=image_count_external+1)) + else + echo_debug "Could not download external image \"${image}\"." fi - ((image_count_external=image_count_external+1)) done cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" @@ -474,7 +485,7 @@ function extract_images_brute_force() { function media_checks() { - if [ -n "${remote_media}" ] + if [[ -n "${remote_media}" && "${remote_media}" != "null" ]] then echo_debug "Fetching remote media from \"${remote_media}\"" wget --timestamping --directory-prefix="${working_dir}/" "${remote_media}" @@ -517,7 +528,7 @@ function media_checks() { shownotes_srt="${media%.*}.srt" if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] then - echo_error "Could not find the subtitles for media \"${media}/\"" + echo_error "Could not find the subtitles for media \"${media}\"" fi # Find duration @@ -720,7 +731,7 @@ function manual_shownotes_review() { fi # remove extra wrappers that seamonkey adds - grep --invert-match --perl-regexp '|head>|' "${shownotes_html%.*}_edited.html" | sponge "${shownotes_html%.*}_edited.html" + grep --invert-match --perl-regexp '|head>|' "${shownotes_html%.*}_edited.html" | sponge "${shownotes_html%.*}_edited.html" # Check to see if images have been linked TODO make a loop for found images if [ "$( find "${working_dir}" -type f -iname "*_image_*" | wc -l )" -ne "0" ] @@ -818,6 +829,17 @@ function create_tts_summary { echo_error "The Episode hpr${ep_num} has not been posted" fi + if [ ! -s "${working_dir}/episode_summary.json" ] + then + echo_error "Failed to find the extracted shownote html file \"episode_summary.json\"" + fi + + duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] + then + echo_error "Invalid duration found in \"episode_summary.json\"" >&2 + fi + synopsis="$( jq --raw-output '.synopsis' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] @@ -964,13 +986,13 @@ function generate_show_transcript() { date="$( jq --raw-output '.date' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" title="$( jq --raw-output '.title' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - duration="$( \date -d@${duration} -u +%H:%M:%S )" + duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )" artist="$( jq --raw-output '.artist' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" explicit="$( jq --raw-output '.explicit' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" license="$( jq --raw-output '.license' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" summary="$( jq --raw-output '.summary' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration}" || "${duration}" == "null" || -z "${duration}" || "${duration}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null" ]] + if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration_iso8601}" || "${duration_iso8601}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null" ]] then echo_error "Could not retrieve the synopsis for the text to speech." ls -al "${working_dir}/episode_summary.json" @@ -978,7 +1000,7 @@ function generate_show_transcript() { REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}" REPLACE_LINE_2="Today's show is entitled, \"${title}\"" - REPLACE_LINE_3="The host is ${artist} and the duration is ${duration}" + REPLACE_LINE_3="The host is ${artist} and the duration is ${duration_iso8601}" REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}" REPLACE_LINE_5="The summary is \"${summary}\"" @@ -1281,6 +1303,8 @@ function register_assets() { if [[ -z "${response}" || "${response}" != "200" ]] then echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" + else + echo_debug "The assets for episode hpr${ep_num} have been registered. The response was \"${response}\"" fi } @@ -1325,15 +1349,15 @@ function copy_files_to_origin_server() { ################################################# # Send the derived files to the server borg to be sent to the Internet Archive -function copy_derived_files_to_borg_for_the_internet_archive() { +function copy_derived_files_to_borg() { - echo_debug "Sending the derived files to the server borg to be sent to the Internet Archive" + echo_debug "Sending the derived files to the admin server borg" for ext in flac mp3 ogg opus wav do if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]] then - echo_error "The inital report information is missing \"${ext}\"." + echo_error "The derived files to the admin server borg is missing \"hpr${ep_num}.${ext}\"." ls -al "${working_dir}/hpr${ep_num}.${ext}" fi done @@ -1402,6 +1426,81 @@ function copy_derived_files_to_borg_for_the_internet_archive() { } +################################################# +# Send the derived files to the server borg to be sent to the Internet Archive + +function upload_to_the_internet_archive() { + echo_debug "Sending the derived files to Internet Archive" + +# # # # for ext in flac mp3 ogg opus wav +# # # # do +# # # # if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]] +# # # # then +# # # # echo_error "The derived files to the IA are missing \"hpr${ep_num}.${ext}\"." +# # # # ls -al "${working_dir}/hpr${ep_num}.${ext}" +# # # # fi +# # # # done +# # # # +# # # # I don't know if you noticed in the documentation, but there are some key things you need to bear in mind when creating a new IA item: +# # # # +# # # # You MUST upload the metadata first, and the way things seem to be organised, your item MUST have a file with it. That's why my test uploaded that 1 second WAV file. This is a constraint in the ia tool and was in the days of using the S3-like interface, so I imagine it's built-in to the IA software. +# # # # When creating the metadata you MUST define the mediatype, and since this controls some of what you can upload, using audio seems to be vital for HPR stuff. +# # # # I discovered (by accident) that if the metadata phase fails and the next phase (uploading files) continues to run, an item will be created but it will be a default thing which YOU CAN'T CHANGE! You have to plead with the people at info@archive.org to reset this item (they will never delete) because you can't change the defaults to the right values. If this happens and is not fixed, then HPR has effectively lost a slot (at least I tell myself this so that I don't inadvertently screw up an upload). +# # # +# # # ia upload hpr_761c0f557b87090db3f8d4d9bce7fc70 \ +# # # test.wav \ +# # # --metadata="mediatype:audio" \ +# # # --metadata="collection:test_collection" \ +# # # --metadata="collection:podcasts" \ +# # # --metadata="contributor:HPR_test" \ +# # # --metadata="creator:HPR_test" \ +# # # --metadata="date:$(date +%F)" \ +# # # --metadata="description:Summary: Uploading a test item from the command lineThis is some test HTML in a test item
" \ +# # # --metadata="language:eng" \ +# # # --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \ +# # # --metadata="subject:Test" \ +# # # --metadata="title:hpr_761c0f557b87090db3f8d4d9bce7fc70" \ +# # # --header x-archive-keep-old-version:0 \ +# # # --retries=5 \ +# # # --no-derive \ +# # # --no-backup +# # # # echo ia upload hpr4295 /data/IA/uploads/hpr4295.wav \ +# # # # --metadata="mediatype:audio" \ +# # # # --metadata="collection:hackerpublicradio" \ +# # # # --metadata="collection:podcasts" \ +# # # # --metadata="contributor:HackerPublicRadio" \ +# # # # --metadata="creator:Ken Fallon" \ +# # # # --metadata="date:2025-01-17" \ +# # # # --metadata="description:Summary: Replacing the battery, swapping a fan, and getting a new desktopIn this reserve show, Ken
In this reserve show, Ken