forked from HPR/hpr-tools
		
	WIP Changes
This commit is contained in:
		
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								workflow/fix_tags_20250201170019.bin
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								workflow/fix_tags_20250201170019.bin
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -291,6 +291,12 @@ function get_episode_metadata() { | |||||||
|     echo_error "Failed to extract the shownote html file \"${shownotes_html}\"" |     echo_error "Failed to extract the shownote html file \"${shownotes_html}\"" | ||||||
|   fi |   fi | ||||||
|    |    | ||||||
|  |   ( echo '<!DOCTYPE HTML><html lang="en"><head><meta charset="utf-8" /></head><body>'  | ||||||
|  |   cat "${shownotes_html}"  | ||||||
|  |   echo '</body> | ||||||
|  |   </html>' | ||||||
|  |   ) | sponge "${shownotes_html}" | ||||||
|  |    | ||||||
|   variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised ) |   variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised ) | ||||||
|  |  | ||||||
|   for variable in "${variables[@]}" |   for variable in "${variables[@]}" | ||||||
| @@ -376,15 +382,20 @@ function extract_images_brute_force() { | |||||||
|   do |   do | ||||||
|     this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" |     this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" | ||||||
|     wget "${image}" --output-document=${this_image} |     wget "${image}" --output-document=${this_image} | ||||||
|     this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" |     if [ -s "${this_image}" ] | ||||||
|     mv -v "${this_image%.*}" "${this_image}.${this_ext}" |  | ||||||
|     this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" |  | ||||||
|     if [ "${this_width}" -gt "400" ] |  | ||||||
|     then |     then | ||||||
|       echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." |       this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" | ||||||
|       magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" |       mv -v "${this_image%.*}" "${this_image}.${this_ext}" | ||||||
|  |       this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" | ||||||
|  |       if [ "${this_width}" -gt "400" ] | ||||||
|  |       then | ||||||
|  |         echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." | ||||||
|  |         magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" | ||||||
|  |       fi | ||||||
|  |       ((image_count_external=image_count_external+1)) | ||||||
|  |     else | ||||||
|  |       echo_debug "Could not download external image \"${image}\"." | ||||||
|     fi |     fi | ||||||
|     ((image_count_external=image_count_external+1)) |  | ||||||
|   done |   done | ||||||
|    |    | ||||||
|   cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" |   cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" | ||||||
| @@ -474,7 +485,7 @@ function extract_images_brute_force() { | |||||||
|  |  | ||||||
| function media_checks() { | function media_checks() { | ||||||
|  |  | ||||||
|   if [ -n "${remote_media}" ] |   if [[ -n "${remote_media}" && "${remote_media}" != "null" ]] | ||||||
|   then |   then | ||||||
|     echo_debug "Fetching remote media from \"${remote_media}\"" |     echo_debug "Fetching remote media from \"${remote_media}\"" | ||||||
|     wget --timestamping  --directory-prefix="${working_dir}/" "${remote_media}" |     wget --timestamping  --directory-prefix="${working_dir}/" "${remote_media}" | ||||||
| @@ -517,7 +528,7 @@ function media_checks() { | |||||||
|   shownotes_srt="${media%.*}.srt" |   shownotes_srt="${media%.*}.srt" | ||||||
|   if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] |   if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] | ||||||
|   then |   then | ||||||
|     echo_error "Could not find the subtitles for media \"${media}/\"" |     echo_error "Could not find the subtitles for media \"${media}\"" | ||||||
|   fi |   fi | ||||||
|    |    | ||||||
|   # Find duration |   # Find duration | ||||||
| @@ -720,7 +731,7 @@ function manual_shownotes_review() { | |||||||
|   fi |   fi | ||||||
|    |    | ||||||
|   # remove extra wrappers that seamonkey adds |   # remove extra wrappers that seamonkey adds | ||||||
|   grep --invert-match --perl-regexp  '<!DOCTYPE|html>|head>|<meta|body>' "${shownotes_html%.*}_edited.html" | sponge "${shownotes_html%.*}_edited.html" |   grep --invert-match --perl-regexp  '<!DOCTYPE|<html|html>|head>|<meta|body>' "${shownotes_html%.*}_edited.html" | sponge "${shownotes_html%.*}_edited.html" | ||||||
|  |  | ||||||
|   # Check to see if images have been linked TODO make a loop for found images |   # Check to see if images have been linked TODO make a loop for found images | ||||||
|   if [ "$( find "${working_dir}" -type f -iname "*_image_*" | wc -l )" -ne "0" ] |   if [ "$( find "${working_dir}" -type f -iname "*_image_*" | wc -l )" -ne "0" ] | ||||||
| @@ -818,6 +829,17 @@ function create_tts_summary { | |||||||
|     echo_error "The Episode hpr${ep_num} has not been posted" |     echo_error "The Episode hpr${ep_num} has not been posted" | ||||||
|   fi |   fi | ||||||
|    |    | ||||||
|  |   if [ ! -s "${working_dir}/episode_summary.json" ] | ||||||
|  |   then | ||||||
|  |     echo_error "Failed to find the extracted shownote html file \"episode_summary.json\"" | ||||||
|  |   fi | ||||||
|  |    | ||||||
|  |   duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|  |   if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] | ||||||
|  |   then | ||||||
|  |     echo_error "Invalid duration found in \"episode_summary.json\"" >&2 | ||||||
|  |   fi | ||||||
|  |    | ||||||
|   synopsis="$( jq --raw-output '.synopsis' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   synopsis="$( jq --raw-output '.synopsis' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|    |    | ||||||
|   if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] |   if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] | ||||||
| @@ -964,13 +986,13 @@ function generate_show_transcript() { | |||||||
|   date="$( jq --raw-output '.date' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   date="$( jq --raw-output '.date' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|   title="$( jq --raw-output '.title' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   title="$( jq --raw-output '.title' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|   duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|   duration="$( \date -d@${duration} -u +%H:%M:%S )" |   duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )" | ||||||
|   artist="$( jq --raw-output '.artist' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   artist="$( jq --raw-output '.artist' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|   explicit="$( jq --raw-output '.explicit' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   explicit="$( jq --raw-output '.explicit' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|   license="$( jq --raw-output '.license' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   license="$( jq --raw-output '.license' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|   summary="$( jq --raw-output '.summary' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" |   summary="$( jq --raw-output '.summary' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/  / /g' )" | ||||||
|    |    | ||||||
|   if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration}" || "${duration}" == "null" || -z "${duration}" || "${duration}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null"  ]] |   if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration_iso8601}" || "${duration_iso8601}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null"  ]] | ||||||
|   then |   then | ||||||
|     echo_error "Could not retrieve the synopsis for the text to speech." |     echo_error "Could not retrieve the synopsis for the text to speech." | ||||||
|     ls -al "${working_dir}/episode_summary.json" |     ls -al "${working_dir}/episode_summary.json" | ||||||
| @@ -978,7 +1000,7 @@ function generate_show_transcript() { | |||||||
|  |  | ||||||
|   REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}" |   REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}" | ||||||
|   REPLACE_LINE_2="Today's show is entitled, \"${title}\"" |   REPLACE_LINE_2="Today's show is entitled, \"${title}\"" | ||||||
|   REPLACE_LINE_3="The host is ${artist} and the duration is ${duration}" |   REPLACE_LINE_3="The host is ${artist} and the duration is ${duration_iso8601}" | ||||||
|   REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}" |   REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}" | ||||||
|   REPLACE_LINE_5="The summary is \"${summary}\"" |   REPLACE_LINE_5="The summary is \"${summary}\"" | ||||||
|    |    | ||||||
| @@ -1281,6 +1303,8 @@ function register_assets() { | |||||||
|   if [[ -z "${response}" || "${response}" != "200" ]] |   if [[ -z "${response}" || "${response}" != "200" ]] | ||||||
|   then |   then | ||||||
|     echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" |     echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" | ||||||
|  |   else | ||||||
|  |     echo_debug "The assets for episode hpr${ep_num} have been registered. The response was \"${response}\"" | ||||||
|   fi |   fi | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -1325,15 +1349,15 @@ function copy_files_to_origin_server() { | |||||||
| ################################################# | ################################################# | ||||||
| # Send the derived files to the server borg to be sent to the Internet Archive | # Send the derived files to the server borg to be sent to the Internet Archive | ||||||
|  |  | ||||||
| function copy_derived_files_to_borg_for_the_internet_archive() { | function copy_derived_files_to_borg() { | ||||||
|  |  | ||||||
|   echo_debug "Sending the derived files to the server borg to be sent to the Internet Archive" |   echo_debug "Sending the derived files to the admin server borg" | ||||||
|    |    | ||||||
|   for ext in flac mp3 ogg opus wav |   for ext in flac mp3 ogg opus wav | ||||||
|   do |   do | ||||||
|     if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]] |     if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]] | ||||||
|     then |     then | ||||||
|       echo_error "The inital report information is missing \"${ext}\"." |       echo_error "The derived files to the admin server borg is missing \"hpr${ep_num}.${ext}\"." | ||||||
|       ls -al "${working_dir}/hpr${ep_num}.${ext}" |       ls -al "${working_dir}/hpr${ep_num}.${ext}" | ||||||
|     fi |     fi | ||||||
|   done |   done | ||||||
| @@ -1402,6 +1426,81 @@ function copy_derived_files_to_borg_for_the_internet_archive() { | |||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | ################################################# | ||||||
|  | # Send the derived files to the server borg to be sent to the Internet Archive | ||||||
|  |  | ||||||
|  | function upload_to_the_internet_archive() { | ||||||
|  |   echo_debug "Sending the derived files to Internet Archive" | ||||||
|  |    | ||||||
|  | # # # #   for ext in flac mp3 ogg opus wav | ||||||
|  | # # # #   do | ||||||
|  | # # # #     if [[ ! -s "${working_dir}/hpr${ep_num}.${ext}" ]] | ||||||
|  | # # # #     then | ||||||
|  | # # # #       echo_error "The derived files to the IA are missing \"hpr${ep_num}.${ext}\"." | ||||||
|  | # # # #       ls -al "${working_dir}/hpr${ep_num}.${ext}" | ||||||
|  | # # # #     fi | ||||||
|  | # # # #   done | ||||||
|  | # # # #  | ||||||
|  | # # # # I don't know if you noticed in the documentation, but there are some key things you need to bear in mind when creating a new IA item: | ||||||
|  | # # # #  | ||||||
|  | # # # # You MUST upload the metadata first, and the way things seem to be organised, your item MUST have a file with it. That's why my test uploaded that 1 second WAV file. This is a constraint in the ia tool and was in the days of using the S3-like interface, so I imagine it's built-in to the IA software. | ||||||
|  | # # # # When creating the metadata you MUST define the mediatype, and since this controls some of what you can upload, using audio seems to be vital for HPR stuff. | ||||||
|  | # # # # I discovered (by accident) that if the metadata phase fails and the next phase (uploading files) continues to run, an item will be created but it will be a default thing which YOU CAN'T CHANGE! You have to plead with the people at info@archive.org to reset this item (they will never delete) because you can't change the defaults to the right values. If this happens and is not fixed, then HPR has effectively lost a slot (at least I tell myself this so that I don't inadvertently screw up an upload). | ||||||
|  | # # #    | ||||||
|  | # # #   ia upload hpr_761c0f557b87090db3f8d4d9bce7fc70 \ | ||||||
|  | # # #       test.wav \ | ||||||
|  | # # #       --metadata="mediatype:audio" \ | ||||||
|  | # # #       --metadata="collection:test_collection" \ | ||||||
|  | # # #       --metadata="collection:podcasts" \ | ||||||
|  | # # #       --metadata="contributor:HPR_test" \ | ||||||
|  | # # #       --metadata="creator:HPR_test" \ | ||||||
|  | # # #       --metadata="date:$(date +%F)" \ | ||||||
|  | # # #       --metadata="description:Summary: Uploading a test item from the command line<br />Source: We would point to the original show here<br /><br /><br /><br /> <p>This is some test HTML in a test item</p>" \ | ||||||
|  | # # #       --metadata="language:eng" \ | ||||||
|  | # # #       --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \ | ||||||
|  | # # #       --metadata="subject:Test" \ | ||||||
|  | # # #       --metadata="title:hpr_761c0f557b87090db3f8d4d9bce7fc70" \ | ||||||
|  | # # #       --header x-archive-keep-old-version:0 \ | ||||||
|  | # # #       --retries=5 \ | ||||||
|  | # # #       --no-derive \ | ||||||
|  | # # #       --no-backup | ||||||
|  | # # # #   echo ia upload hpr4295 /data/IA/uploads/hpr4295.wav \ | ||||||
|  | # # # #       --metadata="mediatype:audio" \ | ||||||
|  | # # # #       --metadata="collection:hackerpublicradio" \ | ||||||
|  | # # # #       --metadata="collection:podcasts" \ | ||||||
|  | # # # #       --metadata="contributor:HackerPublicRadio" \ | ||||||
|  | # # # #       --metadata="creator:Ken Fallon" \ | ||||||
|  | # # # #       --metadata="date:2025-01-17" \ | ||||||
|  | # # # #       --metadata="description:Summary: Replacing the battery, swapping a fan, and getting a new desktop<br />Source: <a href="https://hackerpublicradio.org/eps/hpr4295/index.html">https://hackerpublicradio.org/eps/hpr4295/index.html</a><br /><br /><br /><br /><br /> <p> In this reserve show, Ken </p> <ol>  <li> replaces the battery in a <a href="https://www.gsmarena.com/sony_xperia_10-9353.php" rel="noopener noreferrer" target="_blank"> SONY A10 telephone</a> . </li>  <li> Swaps out a loud fan for a quiet one in a <a href="https://rigol.com.ua/en/products/digital-oscilloscope-rigol-ds1054z/" rel="noopener noreferrer" target="_blank"> RIGOL DS1054</a> . </li>  <li> Then replaces the desktop of an <a href="https://www.ikea.com/us/en/p/bekant-desk-white-s19006323/" rel="noopener noreferrer" target="_blank"> Ikea BEKANT</a> standing desk with a narrower <a href="https://www.ikea.com/us/en/p/lagkapten-tabletop-white-40460815/" rel="noopener noreferrer" target="_blank"> LAGKAPTEN</a> table top not meant for it.</li> </ol>" \ | ||||||
|  | # # # #       --metadata="language:eng" \ | ||||||
|  | # # # #       --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \ | ||||||
|  | # # # #       --metadata="subject:IkeaHacks" \ | ||||||
|  | # # # #       --metadata="subject:diy" \ | ||||||
|  | # # # #       --metadata="subject:hardware" \ | ||||||
|  | # # # #       --metadata="title:hpr4295 :: Three Holiday Hacks from 2023" \ | ||||||
|  | # # # #       --header x-archive-keep-old-version:0 \ | ||||||
|  | # # # #       --retries=5 --no-derive --no-backup | ||||||
|  | # # # #    | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # ia upload hpr4295 /data/IA/uploads/hpr4295.wav \ | ||||||
|  | #     --metadata="mediatype:audio" \ | ||||||
|  | #     --metadata="collection:hackerpublicradio" \ | ||||||
|  | #     --metadata="collection:podcasts" \ | ||||||
|  | #     --metadata="contributor:HackerPublicRadio" \ | ||||||
|  | #     --metadata="creator:Ken Fallon" \ | ||||||
|  | #     --metadata="date:2025-01-17" \ | ||||||
|  | #     --metadata="description:Summary: Replacing the battery, swapping a fan, and getting a new desktop<br />Source: <a href="https://hackerpublicradio.org/eps/hpr4295/index.html">https://hackerpublicradio.org/eps/hpr4295/index.html</a><br /><br /><br /><br /><br /> <p> In this reserve show, Ken </p> <ol>  <li> replaces the battery in a <a href="https://www.gsmarena.com/sony_xperia_10-9353.php" rel="noopener noreferrer" target="_blank"> SONY A10 telephone</a> . </li>  <li> Swaps out a loud fan for a quiet one in a <a href="https://rigol.com.ua/en/products/digital-oscilloscope-rigol-ds1054z/" rel="noopener noreferrer" target="_blank"> RIGOL DS1054</a> . </li>  <li> Then replaces the desktop of an <a href="https://www.ikea.com/us/en/p/bekant-desk-white-s19006323/" rel="noopener noreferrer" target="_blank"> Ikea BEKANT</a> standing desk with a narrower <a href="https://www.ikea.com/us/en/p/lagkapten-tabletop-white-40460815/" rel="noopener noreferrer" target="_blank"> LAGKAPTEN</a> table top not meant for it.</li> </ol>" \ | ||||||
|  | #     --metadata="language:eng" \ | ||||||
|  | #     --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" \ | ||||||
|  | #     --metadata="subject:IkeaHacks" \ | ||||||
|  | #     --metadata="subject:diy" \ | ||||||
|  | #     --metadata="subject:hardware" \ | ||||||
|  | #     --metadata="title:hpr4295 :: Three Holiday Hacks from 2023" \ | ||||||
|  | #     --header x-archive-keep-old-version:0 \ | ||||||
|  | #     --retries=5 --no-derive --no-backup | ||||||
|  |  | ||||||
| ################################################# | ################################################# | ||||||
| # Main exceution starts here | # Main exceution starts here | ||||||
| #  | #  | ||||||
| @@ -1452,7 +1551,11 @@ register_assets | |||||||
|  |  | ||||||
| copy_files_to_origin_server | copy_files_to_origin_server | ||||||
|  |  | ||||||
| copy_derived_files_to_borg_for_the_internet_archive | copy_derived_files_to_borg | ||||||
|  |  | ||||||
|  | # upload_to_the_internet_archive | ||||||
|  |  | ||||||
|  | #for i in {4301..4305};do echo ${i};/home/ken/sourcecode/personal/bin/hpr-check-ccdn-links.bash ${i};done | ||||||
|  |  | ||||||
| echo_debug "The End"  | echo_debug "The End"  | ||||||
| exit 0 | exit 0 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user