WIP Changes

This commit is contained in:
Ken Fallon 2025-02-04 07:20:06 +01:00
parent 7fe9f60205
commit 6621e67703
4 changed files with 120 additions and 17 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -291,6 +291,12 @@ function get_episode_metadata() {
echo_error "Failed to extract the shownote html file \"${shownotes_html}\"" echo_error "Failed to extract the shownote html file \"${shownotes_html}\""
fi fi
( echo '<!DOCTYPE HTML><html lang="en"><head><meta charset="utf-8" /></head><body>'
cat "${shownotes_html}"
echo '</body>
</html>'
) | sponge "${shownotes_html}"
variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised ) variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised )
for variable in "${variables[@]}" for variable in "${variables[@]}"
@ -376,15 +382,20 @@ function extract_images_brute_force() {
do do
this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}"
wget "${image}" --output-document=${this_image} wget "${image}" --output-document=${this_image}
this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" if [ -s "${this_image}" ]
mv -v "${this_image%.*}" "${this_image}.${this_ext}"
this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )"
if [ "${this_width}" -gt "400" ]
then then
echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )"
magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" mv -v "${this_image%.*}" "${this_image}.${this_ext}"
this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )"
if [ "${this_width}" -gt "400" ]
then
echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"."
magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}"
fi
((image_count_external=image_count_external+1))
else
echo_debug "Could not download external image \"${image}\"."
fi fi
((image_count_external=image_count_external+1))
done done
cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}"
@ -474,7 +485,7 @@ function extract_images_brute_force() {
function media_checks() { function media_checks() {
if [ -n "${remote_media}" ] if [[ -n "${remote_media}" && "${remote_media}" != "null" ]]
then then
echo_debug "Fetching remote media from \"${remote_media}\"" echo_debug "Fetching remote media from \"${remote_media}\""
wget --timestamping --directory-prefix="${working_dir}/" "${remote_media}" wget --timestamping --directory-prefix="${working_dir}/" "${remote_media}"
@ -517,7 +528,7 @@ function media_checks() {
shownotes_srt="${media%.*}.srt" shownotes_srt="${media%.*}.srt"
if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]]
then then
echo_error "Could not find the subtitles for media \"${media}/\"" echo_error "Could not find the subtitles for media \"${media}\""
fi fi
# Find duration # Find duration
@ -720,7 +731,7 @@ function manual_shownotes_review() {
fi fi
# remove extra wrappers that seamonkey adds # remove extra wrappers that seamonkey adds
grep --invert-match --perl-regexp '<!DOCTYPE|html>|head>|<meta|body>' "${shownotes_html%.*}_edited.html" | sponge "${shownotes_html%.*}_edited.html" grep --invert-match --perl-regexp '<!DOCTYPE|<html|html>|head>|<meta|body>' "${shownotes_html%.*}_edited.html" | sponge "${shownotes_html%.*}_edited.html"
# Check to see if images have been linked TODO make a loop for found images # Check to see if images have been linked TODO make a loop for found images
if [ "$( find "${working_dir}" -type f -iname "*_image_*" | wc -l )" -ne "0" ] if [ "$( find "${working_dir}" -type f -iname "*_image_*" | wc -l )" -ne "0" ]
@ -818,6 +829,17 @@ function create_tts_summary {
echo_error "The Episode hpr${ep_num} has not been posted" echo_error "The Episode hpr${ep_num} has not been posted"
fi fi
if [ ! -s "${working_dir}/episode_summary.json" ]
then
echo_error "Failed to find the extracted shownote html file \"episode_summary.json\""
fi
duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]]
then
echo_error "Invalid duration found in \"episode_summary.json\"" >&2
fi
synopsis="$( jq --raw-output '.synopsis' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" synopsis="$( jq --raw-output '.synopsis' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] if [[ -z "${synopsis}" || "${synopsis}" == "null" ]]
@ -964,13 +986,13 @@ function generate_show_transcript() {
date="$( jq --raw-output '.date' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" date="$( jq --raw-output '.date' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
title="$( jq --raw-output '.title' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" title="$( jq --raw-output '.title' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
duration="$( \date -d@${duration} -u +%H:%M:%S )" duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )"
artist="$( jq --raw-output '.artist' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" artist="$( jq --raw-output '.artist' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
explicit="$( jq --raw-output '.explicit' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" explicit="$( jq --raw-output '.explicit' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
license="$( jq --raw-output '.license' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" license="$( jq --raw-output '.license' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
summary="$( jq --raw-output '.summary' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" summary="$( jq --raw-output '.summary' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )"
if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration}" || "${duration}" == "null" || -z "${duration}" || "${duration}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null" ]] if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration_iso8601}" || "${duration_iso8601}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null" ]]
then then
echo_error "Could not retrieve the synopsis for the text to speech." echo_error "Could not retrieve the synopsis for the text to speech."
ls -al "${working_dir}/episode_summary.json" ls -al "${working_dir}/episode_summary.json"
@ -978,7 +1000,7 @@ function generate_show_transcript() {
REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}" REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}"
REPLACE_LINE_2="Today's show is entitled, \"${title}\"" REPLACE_LINE_2="Today's show is entitled, \"${title}\""
REPLACE_LINE_3="The host is ${artist} and the duration is ${duration}" REPLACE_LINE_3="The host is ${artist} and the duration is ${duration_iso8601}"
REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}" REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}"
REPLACE_LINE_5="The summary is \"${summary}\"" REPLACE_LINE_5="The summary is \"${summary}\""
@ -1281,6 +1303,8 @@ function register_assets() {
if [[ -z "${response}" || "${response}" != "200" ]] if [[ -z "${response}" || "${response}" != "200" ]]
then then
echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\""
else
echo_debug "The assets for episode hpr${ep_num} have been registered. The response was \"${response}\""
fi fi
} }
@ -1325,15 +1349,15 @@ function copy_files_to_origin_server() {
################################################# #################################################
# Send the derived files to the server borg to be sent to the Internet Archive # Send the derived files to the server borg to be sent to the Internet Archive
function copy_derived_files_to_borg_for_the_internet_archive() { function copy_derived_files_to_borg() {
echo_debug "Sending the derived files to the server borg to be sent to the Internet Archive" echo_debug "Sending the derived files to the admin server borg"
for ext in flac mp3 ogg opus wav for ext in flac mp3 ogg opus wav
do do
if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]] if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]]
then then
echo_error "The inital report information is missing \"${ext}\"." echo_error "The derived files to the admin server borg is missing \"hpr${ep_num}.${ext}\"."
ls -al "${working_dir}/hpr${ep_num}.${ext}" ls -al "${working_dir}/hpr${ep_num}.${ext}"
fi fi
done done
@ -1402,6 +1426,81 @@ function copy_derived_files_to_borg_for_the_internet_archive() {
} }
#################################################
# Send the derived files to the server borg to be sent to the Internet Archive
function upload_to_the_internet_archive() {
echo_debug "Sending the derived files to Internet Archive"
# # # # for ext in flac mp3 ogg opus wav
# # # # do
# # # # if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]]
# # # # then
# # # # echo_error "The derived files to the IA are missing \"hpr${ep_num}.${ext}\"."
# # # # ls -al "${working_dir}/hpr${ep_num}.${ext}"
# # # # fi
# # # # done
# # # #
# # # # I don't know if you noticed in the documentation, but there are some key things you need to bear in mind when creating a new IA item:
# # # #
# # # # You MUST upload the metadata first, and the way things seem to be organised, your item MUST have a file with it. That's why my test uploaded that 1 second WAV file. This is a constraint in the ia tool and was in the days of using the S3-like interface, so I imagine it's built-in to the IA software.
# # # # When creating the metadata you MUST define the mediatype, and since this controls some of what you can upload, using audio seems to be vital for HPR stuff.
# # # # I discovered (by accident) that if the metadata phase fails and the next phase (uploading files) continues to run, an item will be created but it will be a default thing which YOU CAN'T CHANGE! You have to plead with the people at info@archive.org to reset this item (they will never delete) because you can't change the defaults to the right values. If this happens and is not fixed, then HPR has effectively lost a slot (at least I tell myself this so that I don't inadvertently screw up an upload).
# # #
# # # ia upload hpr_761c0f557b87090db3f8d4d9bce7fc70 \
# # # test.wav \
# # # --metadata="mediatype:audio" \
# # # --metadata="collection:test_collection" \
# # # --metadata="collection:podcasts" \
# # # --metadata="contributor:HPR_test" \
# # # --metadata="creator:HPR_test" \
# # # --metadata="date:$(date +%F)" \
# # # --metadata="description:Summary: Uploading a test item from the command line<br />Source: We would point to the original show here<br /><br /><br /><br /> <p>This is some test HTML in a test item</p>" \
# # # --metadata="language:eng" \
# # # --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \
# # # --metadata="subject:Test" \
# # # --metadata="title:hpr_761c0f557b87090db3f8d4d9bce7fc70" \
# # # --header x-archive-keep-old-version:0 \
# # # --retries=5 \
# # # --no-derive \
# # # --no-backup
# # # # echo ia upload hpr4295 /data/IA/uploads/hpr4295.wav \
# # # # --metadata="mediatype:audio" \
# # # # --metadata="collection:hackerpublicradio" \
# # # # --metadata="collection:podcasts" \
# # # # --metadata="contributor:HackerPublicRadio" \
# # # # --metadata="creator:Ken Fallon" \
# # # # --metadata="date:2025-01-17" \
# # # # --metadata="description:Summary: Replacing the battery, swapping a fan, and getting a new desktop<br />Source: <a href="https://hackerpublicradio.org/eps/hpr4295/index.html">https://hackerpublicradio.org/eps/hpr4295/index.html</a><br /><br /><br /><br /><br /> <p> In this reserve show, Ken </p> <ol> <li> replaces the battery in a <a href="https://www.gsmarena.com/sony_xperia_10-9353.php" rel="noopener noreferrer" target="_blank"> SONY A10 telephone</a> . </li> <li> Swaps out a loud fan for a quiet one in a <a href="https://rigol.com.ua/en/products/digital-oscilloscope-rigol-ds1054z/" rel="noopener noreferrer" target="_blank"> RIGOL DS1054</a> . </li> <li> Then replaces the desktop of an <a href="https://www.ikea.com/us/en/p/bekant-desk-white-s19006323/" rel="noopener noreferrer" target="_blank"> Ikea BEKANT</a> standing desk with a narrower <a href="https://www.ikea.com/us/en/p/lagkapten-tabletop-white-40460815/" rel="noopener noreferrer" target="_blank"> LAGKAPTEN</a> table top not meant for it.</li> </ol>" \
# # # # --metadata="language:eng" \
# # # # --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \
# # # # --metadata="subject:IkeaHacks" \
# # # # --metadata="subject:diy" \
# # # # --metadata="subject:hardware" \
# # # # --metadata="title:hpr4295 :: Three Holiday Hacks from 2023" \
# # # # --header x-archive-keep-old-version:0 \
# # # # --retries=5 --no-derive --no-backup
# # # #
}
# ia upload hpr4295 /data/IA/uploads/hpr4295.wav \
# --metadata="mediatype:audio" \
# --metadata="collection:hackerpublicradio" \
# --metadata="collection:podcasts" \
# --metadata="contributor:HackerPublicRadio" \
# --metadata="creator:Ken Fallon" \
# --metadata="date:2025-01-17" \
# --metadata="description:Summary: Replacing the battery, swapping a fan, and getting a new desktop<br />Source: <a href="https://hackerpublicradio.org/eps/hpr4295/index.html">https://hackerpublicradio.org/eps/hpr4295/index.html</a><br /><br /><br /><br /><br /> <p> In this reserve show, Ken </p> <ol> <li> replaces the battery in a <a href="https://www.gsmarena.com/sony_xperia_10-9353.php" rel="noopener noreferrer" target="_blank"> SONY A10 telephone</a> . </li> <li> Swaps out a loud fan for a quiet one in a <a href="https://rigol.com.ua/en/products/digital-oscilloscope-rigol-ds1054z/" rel="noopener noreferrer" target="_blank"> RIGOL DS1054</a> . </li> <li> Then replaces the desktop of an <a href="https://www.ikea.com/us/en/p/bekant-desk-white-s19006323/" rel="noopener noreferrer" target="_blank"> Ikea BEKANT</a> standing desk with a narrower <a href="https://www.ikea.com/us/en/p/lagkapten-tabletop-white-40460815/" rel="noopener noreferrer" target="_blank"> LAGKAPTEN</a> table top not meant for it.</li> </ol>" \
# --metadata="language:eng" \
# --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \
# --metadata="subject:IkeaHacks" \
# --metadata="subject:diy" \
# --metadata="subject:hardware" \
# --metadata="title:hpr4295 :: Three Holiday Hacks from 2023" \
# --header x-archive-keep-old-version:0 \
# --retries=5 --no-derive --no-backup
################################################# #################################################
# Main exceution starts here # Main exceution starts here
# #
@ -1452,7 +1551,11 @@ register_assets
copy_files_to_origin_server copy_files_to_origin_server
copy_derived_files_to_borg_for_the_internet_archive copy_derived_files_to_borg
# upload_to_the_internet_archive
#for i in {4301..4305};do echo ${i};/home/ken/sourcecode/personal/bin/hpr-check-ccdn-links.bash ${i};done
echo_debug "The End" echo_debug "The End"
exit 0 exit 0