diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index a774c46..4324d55 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/ +# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/ #============================================================ # git clone https://github.com/davmo/fix_tags.git @@ -58,12 +58,12 @@ function program_checks() { then echo_error "The \"${processing_dir}\" is required but is not defined." fi - + if [[ ! -s "${theme}" || ! -s "${silence}" || ! -s "${outro_flac}" || ! -s "${outro_srt}" || ! -s "${intro_srt}" ]] then echo_error "The files for the theme are not available." ls -al "${theme}" "${silence}" "${outro_flac}" "${outro_srt}" "${intro_srt}" - fi + fi function is_installed() { for this_program in "$@" @@ -85,7 +85,7 @@ function program_checks() { echo_debug "If a directory is provided then the shownotes.json will be used." fi done - + } ################################################# @@ -93,18 +93,18 @@ function program_checks() { function check_variable_is_correct() { - echo_debug "Checking variables ${*}. check_variable_is_correct()" - + #echo_debug "Checking variables ${*}. check_variable_is_correct()" + for argument in "$@" do case "${argument}" in - working_dir) + working_dir) if [[ ! -d "${working_dir}" || -z "${working_dir}" ]] then echo_error "The \"working dir\" variable is missing." fi ;; - ep_num) + ep_num) if [ -z "${ep_num}" ] then echo_error "The \"ep_num\" variable is missing." @@ -114,39 +114,161 @@ function check_variable_is_correct() { echo_error "The \"ep_num\" variable is not a valid number between 1 and 9999." fi ;; - shownotes_edited) - if [ ! -s "${shownotes_edited}" ] + shownotes_edited) + if [[ ! -s "${shownotes_edited}" || -z "${shownotes_edited}" ]] then - echo_debug "The \"shownotes_edited\" variable is missing." - return + echo_error "The \"shownotes_edited\" variable/file is missing." fi if [ "$( file --brief --mime-type "${shownotes_edited}" | grep --count 'text/html' )" -ne "1" ] then echo_error "The \"shownotes_edited\" variable has not a valid \"text/html\" mime type." fi ;; + episode_summary_json) + if [[ ! -s "${episode_summary_json}" || -z "${episode_summary_json}" ]] + then + echo_error "The \"episode_summary_json\" variable/file is missing." + fi + if [ "$( file --brief --mime-type "${episode_summary_json}" | grep --count 'application/json' )" -ne "1" ] + then + echo_error "The \"episode_summary_json\" variable has not a valid \"application/json\" mime type." + fi + ;; + album) + if [[ -z "${album}" || "${album}" == "null" ]] + then + echo_error "The \"album\" variable is missing." + fi + if [ "$( echo "${album}" | grep --perl-regexp '^Hacker Public Radio$' | wc --lines )" -eq "0" ] + then + echo_error "The \"album\" variable is not \"Hacker Public Radio\"." + fi + ;; + artist) + if [[ -z "${artist}" || "${artist}" == "null" ]] + then + echo_error "The \"artist\" variable is missing." + fi + ;; + comment) + if [[ -z "${comment}" || "${comment}" == "null" ]] + then + echo_error "The \"comment\" variable is missing." + fi + ;; + date) + if [[ -z "${date}" || "${date}" == "null" ]] + then + echo_error "The \"date\" variable is missing." + fi + ;; + duration) + if [[ -z "${duration}" || "${duration}" == "null" ]] + then + echo_error "The \"duration\" variable is missing." + fi + if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] + then + echo_error "Invalid duration missing or outside range 30 to 30000." >&2 + fi + ;; + duration_iso8601) + if [[ -z "${duration_iso8601}" || "${duration_iso8601}" == "null" ]] + then + echo_error "The \"duration_iso8601\" variable is missing." + fi + ;; + explicit) + if [[ -z "${explicit}" || "${explicit}" == "null" ]] + then + echo_error "The \"explicit\" variable is missing." + fi + ;; + genre) + if [[ -z "${genre}" || "${genre}" == "null" ]] + then + echo_error "The \"genre\" variable is missing." + fi + ;; + hostid) + if [[ -z "${hostid}" || "${hostid}" == "null" ]] + then + echo_error "The \"hostid\" variable is missing." + fi + ;; + license) + if [[ -z "${license}" || "${license}" == "null" ]] + then + echo_error "The \"license\" variable is missing." + fi + ;; + license_url) + if [[ -z "${license_url}" || "${license_url}" == "null" ]] + then + echo_error "The \"license_url\" variable is missing." + fi + ;; + summary) + if [[ -z "${summary}" || "${summary}" == "null" ]] + then + echo_error "The \"summary\" variable is missing." + fi + ;; + synopsis) + if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] + then + echo_error "The \"synopsis\" variable is missing." + fi + ;; + tags) + if [[ -z "${tags}" || "${tags}" == "null" ]] + then + echo_error "The \"tags\" variable is missing." + fi + ;; + title) + if [[ -z "${title}" || "${title}" == "null" ]] + then + echo_error "The \"title\" variable is missing." + fi + ;; + track) + if [[ -z "${track}" || "${track}" == "null" ]] + then + echo_error "The \"track\" variable is missing." + fi + ;; + year) + if [[ -z "${year}" || "${year}" == "null" ]] + then + echo_error "The \"year\" variable is missing." + fi + ;; + *) + echo_error "An unknown variable \"${argument}\" was provided." + ;; esac done - + } ################################################# # Get the next show in the queue -function get_working_dir_from_hpr_hub() { +function get_ep_num_from_hpr_hub() { + + echo_debug "Processing the next HPR Show in the queue. get_ep_num_from_hpr_hub()" - echo_debug "Processing the next HPR Show in the queue. get_working_dir_from_hpr_hub()" - if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] then echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" fi - + if [ ! -s "${processing_dir}/status.csv" ] then echo_error "Failed to retrieve \"${processing_dir}/status.csv\" from server." fi - + response=$( cat "${processing_dir}/status.csv" | grep ',SHOW_SUBMITTED,' | head -1 | sed 's/,/ /g' ) if [ -z "${response}" ] then @@ -154,7 +276,7 @@ function get_working_dir_from_hpr_hub() { curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n echo_error "There appear to be no more shows with the status \"SHOW_SUBMITTED\"." fi - + timestamp_epoc="$( echo ${response} | awk '{print $1}' )" ep_num="$( echo ${response} | awk '{print $2}' )" ep_date="$( echo ${response} | awk '{print $3}' )" @@ -166,73 +288,70 @@ function get_working_dir_from_hpr_hub() { source_dir="hpr:${hpr_upload_dir}" dest_dir="${timestamp_epoc}_${ep_num}_${ep_date}_${key}" working_dir="${processing_dir}/${dest_dir}" - + echo_debug "Downloading hpr${ep_num} from ${email_unpadded}" echo_debug "" echo_debug "rsync -ave ssh --partial --progress ${source_dir}/ ${working_dir}/" ssh hpr -t "detox -v ${hpr_upload_dir}/" rsync -ave ssh --partial --progress ${source_dir}/ ${working_dir}/ - + if [ ! -s "${working_dir}/shownotes.json" ] then echo_error "The working dir is missing the shownotes file \"${working_dir}/shownotes.json\"" fi - + if [ "$( file --brief --mime-type "${working_dir}/shownotes.json" | grep --count "application/json" )" -eq 0 ] then echo_error "The \"${working_dir}/shownotes.json\" is not a \"application/json\" file" fi - + } ################################################# # Get the show information from a local directory -function get_working_dir_from_local_dir() { +function get_ep_num_from_local_dir() { - echo_debug "Processing a local directory. get_working_dir_from_local_dir()" - - if [[ ! -d "${working_dir}" || -z "${working_dir}" ]] - then - echo_error "The working dir is missing. Please supply a working directory." - fi + echo_debug "Processing a local directory. get_ep_num_from_local_dir()" + + check_variable_is_correct working_dir if [ ! -s "${working_dir}/shownotes.json" ] then - + echo_debug "Could not find a \"shownotes.json\" in the working directory \"${working_dir}/\"" - + if [ -z "${ep_num}" ] then echo_debug "Attempting to get episode number from the \"${working_dir}\"" ep_num="$( echo "${working_dir}" | grep --color=never --perl-regexp --only-matching '_[0-9]{4}_' | sed 's/_//g' )" fi - + if [ -z "${ep_num}" ] then echo_error "Could not find the episode number - please rerun with \"$0 ep_num=9876\"" - fi - + fi + echo_debug "Attempting to download information for episode \"${ep_num}\"" - + if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${working_dir}/shownotes.json" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted." fi - + if [ ! -s "${working_dir}/shownotes.json" ] then echo_error "The Episode information for hpr${ep_num} failed to download." fi - + fi - + if [[ -s "${working_dir}/shownotes.json" && "$( file --brief --mime-type "${working_dir}/shownotes.json" | grep --count "application/json" )" -eq 0 ]] then echo_error "\"${working_dir}/shownotes.json\" is not a \"application/json\" file" fi - + } ################################################# @@ -245,18 +364,24 @@ function get_working_dir() { if [ $# -eq 0 ] then - get_working_dir_from_hpr_hub - else - get_working_dir_from_local_dir $@ + get_ep_num_from_hpr_hub fi - + + check_variable_is_correct working_dir + + if [ -z "${ep_num}" ] + then + get_ep_num_from_local_dir $@ + check_variable_is_correct ep_num + fi + if [ ! -s "${working_dir}/shownotes.json" ] then echo_error "The working dir \"${working_dir}\" could not be found." fi - + echo_debug "Found working directory as \"${working_dir}\"" - + if [ ! -d "${working_dir}/processing/" ] then mkdir -v "${working_dir}/processing/" @@ -265,7 +390,7 @@ function get_working_dir() { echo_error "Could not create the processing directory \"${working_dir}/processing/\"." fi fi - + } ################################################# @@ -274,12 +399,12 @@ function get_working_dir() { function get_episode_metadata() { echo_debug "Extracting the episode metadata. get_episode_metadata()" - + if [[ -s "${working_dir}/shownotes.json" && "$( file --brief --mime-type "${working_dir}/shownotes.json" | grep --count "application/json" )" -eq 0 ]] then echo_error "\"${working_dir}/shownotes.json\" is not a \"application/json\" file" fi - + shownotes_json="${working_dir}/shownotes.json" shownotes_html="${working_dir}/shownotes.html" shownotes_edited="${working_dir}/shownotes_edited.html" @@ -311,13 +436,13 @@ function get_episode_metadata() { then echo_error "Failed to extract the shownote html file \"${shownotes_html}\"" fi - - ( echo '
' - cat "${shownotes_html}" + + ( echo '' + cat "${shownotes_html}" echo ' ' ) | sponge "${shownotes_html}" - + variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised ) for variable in "${variables[@]}" @@ -329,7 +454,7 @@ function get_episode_metadata() { echo_debug "The variable \"${variable}\" is set to \"${!variable}\""; fi done - + # Argument Override if [ $# -gt 0 ] then @@ -346,28 +471,28 @@ function get_episode_metadata() { fi done fi - + # Hosts need to exist in the database if [ "${hostid}" == '0' ] then echo_error "The hostid is 0. Create the host and use \"hostid=???\" to override" fi - + } ################################################# # Extract_images by brute force function extract_images_brute_force() { - + echo_debug "Extracting images with grep. extract_images_brute_force()" - + if [ -s "${shownotes_edited}" ] then echo_debug "There is already an edited version of the shownotes at \"${shownotes_edited}\", slipping image extraction." return fi - + if [[ -z "${shownotes_html}" || ! -s "${shownotes_html}" ]] then echo_error "The shownotes_html file \"${shownotes_html}\" could not be found." @@ -377,7 +502,7 @@ function extract_images_brute_force() { sed "s#>#>\n#g" "${shownotes_html}" | sponge "${shownotes_html}" - + # Extract embedded images image_count_embedded="1" @@ -395,7 +520,7 @@ function extract_images_brute_force() { fi ((image_count_embedded=image_count_embedded+1)) done - + # Download referenced images image_count_external="1" @@ -418,15 +543,15 @@ function extract_images_brute_force() { echo_debug "Could not download external image \"${image}\"." fi done - + cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" - + if [ "${image_count_embedded}" -gt "1" ] then image_count_embedded="1" - + touch "${shownotes_html}.embedded_images" - + cat "${shownotes_html}" | while read this_line do if [ "$( echo "${this_line}" | grep --count "LOCAL_IMAGE_REMOVED" )" -eq "0" ] @@ -452,19 +577,19 @@ function extract_images_brute_force() { ((image_count_embedded=image_count_embedded+1)) fi done - + mv -v "${shownotes_html}.embedded_images" "${shownotes_html}" - + else echo_debug "No embedded images found. ${image_count_embedded}" fi - + if [ "${image_count_external}" -gt "1" ] then image_count_external="1" - + touch "${shownotes_html}.external_images" - + cat "${shownotes_html}" | remove-image.pl | while read this_line do if [ "$( echo "${this_line}" | grep --count "REMOTE_IMAGE_REMOVED" )" -eq "0" ] @@ -490,19 +615,19 @@ function extract_images_brute_force() { ((image_count_external=image_count_external+1)) fi done - - mv -v "${shownotes_html}.external_images" "${shownotes_html}" - + + mv -v "${shownotes_html}.external_images" "${shownotes_html}" + else echo_debug "No external images found." fi -## TODO End Temp fix +## TODO End Temp fix } ################################################# -## Media Checks +## Media Checks function media_checks() { @@ -524,7 +649,7 @@ function media_checks() { find "${working_dir}/" -type f echo_error "Can't find any media in \"${working_dir}/\"" fi - + media_basename="$( basename "${media}" )" if [ -z "${media_basename}" ] then @@ -535,27 +660,27 @@ function media_checks() { then echo "Multiple files found. Which one do you want to use ?" select this_media in $( echo "${media}" ) - do + do ls -al "${this_media}" media="${this_media}" break done fi echo_debug "You selected \"${media}\"." - + if [[ -z "${media}" || ! -s "${media}" ]] then echo_error "Could not find the media \"${media}/\"" fi - + shownotes_srt="${media%.*}.srt" if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] then echo_error "Could not find the subtitles for media \"${media}\"" fi - + # Find duration - duration=$( mediainfo --full --Output=JSON "${media}" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' ) + duration=$( mediainfo --full --Output=JSON "${media}" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' ) if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] then echo_error "Invalid duration found in \"${media}\"" >&2 @@ -573,15 +698,15 @@ function media_checks() { # Gernerate the Spectrum and Waveform image ffmpeg -hide_banner -loglevel error -y -i "${media}" -lavfi "showspectrumpic=s=960x540" "${working_dir}/processing/${media_basename%.*}_spectrum.png" audio2image.bash "${media}" && mv -v "${media%.*}.png" "${working_dir}/processing/${media_basename%.*}_waveform.png" - - # Getting metadata - + + # Getting metadata + mediainfo "${media}" > "${working_dir}/processing/${media_basename%.*}_mediainfo.txt" - + exiftool "${media}" > "${working_dir}/processing/${media_basename%.*}_exiftool.txt" for check_file in spectrum.png waveform.png mediainfo.txt exiftool.txt - do + do if [ ! -s "${working_dir}/processing/${media_basename%.*}_${check_file}" ] then echo_error "The ${check_file} file was not generated for the \"${working_dir}/processing/${media_basename%.*}_${check_file}\"" >&2 @@ -591,13 +716,13 @@ function media_checks() { ffprobe="$( ffprobe ${media} 2>&1 | grep Audio: | sed 's/^.\s//g' )" file_mime="$( file --brief --mime ${media} )" file_mime_type="$( file --brief --mime-type ${media} )" - + if [[ -z "${ffprobe}" || -z ${file_mime} || -s ${file_mime_type} ]] then echo "ffprobe: ${ffprobe}, file_mime: ${file_mime},file_mime_type: ${file_mime_type}" echo_error "Wasn't able to find mime metadata from \"${media}/\"" fi - + } ################################################# @@ -606,9 +731,9 @@ function media_checks() { function generate_initial_report() { echo_debug "Generating the initial report. generate_initial_report()" - + # TODO list the images. - + echo " @@ -727,20 +852,20 @@ function manual_shownotes_review() { ls -al "${shownotes_html}" "${working_dir}/processing/${media_basename%.*}_media_report.html" echo_error "The files needed for to generate the inital report information are not available." fi - + if [ -s "${shownotes_edited}" ] then echo_debug "There is already an edited version of the shownotes at \"${shownotes_edited}\"." return fi - + cp -v "${shownotes_html}" "${shownotes_edited}" if [ ! -s "${shownotes_edited}" ] then echo_error "The edited shownotes are missing \"${shownotes_edited}\"." fi - + kate "${shownotes_edited}" >/dev/null 2>&1 & librewolf "${working_dir}/processing/${media_basename%.*}_media_report.html" >/dev/null 2>&1 & seamonkey "${shownotes_edited}" >/dev/null 2>&1 & @@ -753,7 +878,7 @@ function manual_shownotes_review() { then echo_error "The final review was not approved." fi - + # remove extra wrappers that seamonkey adds grep --invert-match --perl-regexp '|head>|' "${shownotes_edited}" | sponge "${shownotes_edited}" @@ -779,15 +904,15 @@ function post_show_to_hpr_db() { echo "WARN: The Episode hpr${ep_num} has already been posted" return fi - + if [ ! -s "${shownotes_edited}" ] then echo_error "Failed to find the extracted shownote html file \"${shownotes_edited}\"" fi notes="$( cat "${shownotes_edited}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )" host_profile_encoded="$( echo "${host_profile}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )" - - + + post_show_json="${working_dir}/post_show.json" echo "Sending:" @@ -828,20 +953,57 @@ function post_show_to_hpr_db() { \"host_profile\": \"${host_profile_encoded}\", \"notes\": \"${notes}\" }" > "${post_show_json}" - + jq '.' "${post_show_json}" - + if [ $? -ne 0 ] then echo_error "The file \"${post_show_json}\" is not valid json." fi - + curl --netrc --include --request POST "https://hub.hackerpublicradio.org/cms/add_show_json.php" --header "Content-Type: application/json" --data-binary "@${post_show_json}" if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi + +} + +################################################# +# Get the + +function get_variables_from_episode_summary_json() { + + echo_debug "Creating Text to Speech summary. get_variables_from_episode_summary_json()" + + check_variable_is_correct ep_num working_dir + + if [ -z "${episode_summary_json}" ] + then + episode_summary_json="${working_dir}/episode_summary.json" + fi + + if [ ! -s "${episode_summary_json}" ] + then + echo_debug "The \"episode_summary_json\" variable/file is missing." + if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" )" != 200 ] + then + echo_error "The Episode hpr${ep_num} has not been posted" + fi + fi + check_variable_is_correct episode_summary_json + + for episode_summary_key in $( jq --raw-output '. | keys | @tsv' "${episode_summary_json}" ) + do + episode_summary_value="$( jq --raw-output ".${episode_summary_key}" "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + declare "${episode_summary_key}=${episode_summary_value}" + echo_debug "Setting \"${episode_summary_key}\" to \"${episode_summary_value}\" from \"$( basename ${episode_summary_json} )\"" + check_variable_is_correct ${episode_summary_key} + done + + duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )" + check_variable_is_correct duration_iso8601 } @@ -852,38 +1014,17 @@ function create_tts_summary() { echo_debug "Creating Text to Speech summary. create_tts_summary()" - if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${working_dir}/episode_summary.json" )" != 200 ] - then - echo_error "The Episode hpr${ep_num} has not been posted" - fi - - if [ ! -s "${working_dir}/episode_summary.json" ] - then - echo_error "Failed to find the extracted shownote html file \"episode_summary.json\"" - fi - - duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] - then - echo_error "Invalid duration found in \"episode_summary.json\"" >&2 - fi - - synopsis="$( jq --raw-output '.synopsis' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - - if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] - then - echo_error "Could not retrieve the synopsis for the text to speech." - fi + check_variable_is_correct working_dir duration synopsis piper_bin piper_voice echo_debug "Converting text synopsis \"${synopsis}\" to speech." - + echo "${synopsis}" | "${piper_bin}" --model "${piper_voice}" --output_file "${working_dir}/processing/episode_tts.wav" - + if [ ! -s "${working_dir}/processing/episode_tts.wav" ] then echo_error "The text to speech episode summary was not created \"${working_dir}/processing/episode_tts.wav\"." fi - + } ################################################# @@ -892,22 +1033,22 @@ function create_tts_summary() { function generate_intro() { echo_debug "Generating the intro. generate_intro()" - + if [[ ! -s "${working_dir}/processing/episode_tts.wav" || ! -s "${theme}" || ! -s "${media}" || ! -s "${outro_flac}" || ! -d "${working_dir}/processing/" ]] then echo_error "The files for the intro are not available." ls -al "${working_dir}/processing/episode_tts.wav" "${theme}" "${media}" "${outro_flac}" "${working_dir}/processing/" fi - + # Everything needs to be in the same format for the intro, 1 channel (mono) Sampling rate 44.1 kHz ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/processing/episode_tts.wav" -ar 44100 -ac 1 "${working_dir}/processing//episode_tts.flac" - + # A level of silence is added at the beginning of the text to speech sox -V2 "${silence}" "${working_dir}/processing//episode_tts.flac" "${working_dir}/processing/episode_summary.flac" - + # The tracks are merged together resulting in the theme playing first, then after a period of silence the text to speech enters sox -V2 -m "${working_dir}/processing/episode_summary.flac" "${theme}" "${working_dir}/processing/episode_intro.flac" - + if [[ ! -s "${working_dir}/processing//episode_tts.flac" || ! -s "${working_dir}/processing/episode_summary.flac" || ! -s "${working_dir}/processing/episode_intro.flac" ]] then echo_error "The files for the theme audio sandwich are not available." @@ -922,22 +1063,22 @@ function generate_intro() { function generate_parent_audio() { echo_debug "Generating the parent audio - the sandwitch. generate_parent_audio()" - + if [[ ! -s "${working_dir}/processing/episode_intro.flac" || ! -s "${media}" || ! -s "${outro_flac}" ]] then echo_error "The files for the sandwich are not available." - ls -al + ls -al fi - + # Everything needs to be in the same format so the text to speech needs to be converted to 2 channel Sampling rate 44.1 kHz ffmpeg -hide_banner -loglevel error -y -i "${media}" -ar 44100 -ac 1 "${working_dir}/processing/episode_body.flac" - + # Combine the components together sox -V2 "${working_dir}/processing/episode_intro.flac" "${working_dir}/processing/episode_body.flac" "${outro_flac}" "${working_dir}/processing/episode_sandwitch.flac" - - # Normalise the audio + + # Normalise the audio ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/processing/episode_sandwitch.flac" -af loudnorm=I=-16:LRA=11:TP=-1.5 "${working_dir}/processing/episode_final.flac" - + } ################################################# @@ -946,19 +1087,19 @@ function generate_parent_audio() { function generate_derived_media() { echo_debug "Generating derived audio. generate_derived_media()" - + if [[ ! -s "${working_dir}/processing/episode_final.flac" ]] then ls -al echo_error "The final cut is not available." fi - - episode_comment="$( jq --raw-output '.comment' "${working_dir}/episode_summary.json" )" + + episode_comment="$( jq --raw-output '.comment' "${episode_summary_json}" )" episode_year="$( echo "${ep_date}" | cut -c -4 )" - + # https://wiki.multimedia.cx/index.php?title=FFmpeg_Metadata - for extension in flac wav mp3 ogg opus + for extension in flac wav mp3 ogg opus do echo_debug "Generating \"hpr${ep_num}.${extension}\"." ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/processing/episode_final.flac" \ @@ -975,17 +1116,17 @@ function generate_derived_media() { "${working_dir}/hpr${ep_num}.${extension}" fix_tags -album="Hacker Public Radio" -artist="${host_name}" -comment="${episode_comment} The license is ${episode_license}" -genre="Podcast" -title="${title}" -track="${ep_num}" -year="${episode_year}" "${working_dir}/hpr${ep_num}.${extension}" - + if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]] then echo_error "Failed to generate \"${working_dir}/hpr${ep_num}.${extension}\"." ls -al "${working_dir}/hpr${ep_num}.${extension}" fi - + done - + cp -v "${media}" "${working_dir}/hpr${ep_num}_source.${media##*.}" - + if [[ ! -s "${working_dir}/hpr${ep_num}_source.${media##*.}" ]] then echo_error "Failed to copy \"${working_dir}/hpr${ep_num}_source.${media##*.}\"." @@ -1000,30 +1141,30 @@ function generate_derived_media() { function generate_show_transcript() { echo_debug "Generate show transcript and subtitles. generate_show_transcript()" - + # TODO Currently processed elsewhere by hpr-get-and-transcode.bash and uploaded to hpr:upload/ to be synced with media above if [[ ! -s "${media}" || ! -s "${media%.*}.srt" || ! -s "${intro_srt}" || ! -s "${outro_srt}" || ! -s "${working_dir}/processing/episode_intro.flac" || ! -s "${working_dir}/processing/episode_body.flac" ]] then ls -al "${media}" "${media%.*}.srt" "${intro_srt}" "${outro_srt}" "${working_dir}/processing/episode_intro.flac" "${working_dir}/processing/episode_body.flac" echo_error "The transcriptions files are not available." - fi - - # Copy in the intro subtitle template and replace each line with the text with the summary + fi - date="$( jq --raw-output '.date' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - title="$( jq --raw-output '.title' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - duration="$( jq --raw-output '.duration' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + # Copy in the intro subtitle template and replace each line with the text with the summary + + date="$( jq --raw-output '.date' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + title="$( jq --raw-output '.title' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + duration="$( jq --raw-output '.duration' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )" - artist="$( jq --raw-output '.artist' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - explicit="$( jq --raw-output '.explicit' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - license="$( jq --raw-output '.license' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - summary="$( jq --raw-output '.summary' "${working_dir}/episode_summary.json" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" - + artist="$( jq --raw-output '.artist' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + explicit="$( jq --raw-output '.explicit' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + license="$( jq --raw-output '.license' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + summary="$( jq --raw-output '.summary' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" + if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration_iso8601}" || "${duration_iso8601}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null" ]] then echo_error "Could not retrieve the synopsis for the text to speech." - ls -al "${working_dir}/episode_summary.json" + ls -al "${episode_summary_json}" fi REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}" @@ -1031,50 +1172,50 @@ function generate_show_transcript() { REPLACE_LINE_3="The host is ${artist} and the duration is ${duration_iso8601}" REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}" REPLACE_LINE_5="The summary is \"${summary}\"" - + cp -v ${intro_srt} "${working_dir}/processing/episode_intro.srt" cp -v ${outro_srt} "${working_dir}/processing/episode_outro.srt" - + sed -e "s~REPLACE_LINE_1~${REPLACE_LINE_1}~g" -e "s~REPLACE_LINE_2~${REPLACE_LINE_2}~g" -e "s~REPLACE_LINE_3~${REPLACE_LINE_3}~g" -e "s~REPLACE_LINE_4~${REPLACE_LINE_4}~g" -e "s~REPLACE_LINE_5~${REPLACE_LINE_5}~g" -i "${working_dir}/processing/episode_intro.srt" - + if [ "$( grep --count REPLACE_LINE "${working_dir}/processing/episode_intro.srt" )" -ne "0" ] then echo_error "The intro subtitles were not correctly generated \"${working_dir}/processing/episode_intro.srt\"." fi - + # Time shift the media subtitles on by the duration of the intro wav file # https://trac.ffmpeg.org/wiki/UnderstandingItsoffset - + itsoffset_intro="$( mediainfo --full --Output=JSON "${working_dir}/processing/episode_intro.flac" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' )" - + if [[ -z "${itsoffset_intro}" || "${itsoffset_intro}" == "null" ]] then echo_error "Could not retrieve the itsoffset_intro to correct the timing of the subtitles." fi - + ffmpeg -hide_banner -loglevel error -y -itsoffset "${itsoffset_intro}" -i "${media%.*}.srt" -c copy "${working_dir}/processing/episode_body.srt" - # Timeshift the outro by the duration of the intro and the supplied media - + # Timeshift the outro by the duration of the intro and the supplied media + itsoffset_body="$( mediainfo --full --Output=JSON "${working_dir}/processing/episode_body.flac" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' )" - + if [[ -z "${itsoffset_body}" || "${itsoffset_body}" == "null" ]] then echo_error "Could not retrieve the itsoffset_body to correct the timing of the subtitles." fi - + itsoffset_body=$((itsoffset_intro + $itsoffset_body)) - + ffmpeg -hide_banner -loglevel error -y -itsoffset "${itsoffset_body}" -i "${working_dir}/processing/episode_outro.srt" -c copy "${working_dir}/processing/episode_outro_shifted.srt" - + # Combine the intro, timeshifted media subtitles, and the timeshifted outro subtitles. - + cat "${working_dir}/processing/episode_intro.srt" "${working_dir}/processing/episode_body.srt" "${working_dir}/processing/episode_outro_shifted.srt" > "${working_dir}/processing/episode.srt" - + # Parse the resulting subtitle file fixing the numberic counter # https://en.wikipedia.org/wiki/SubRip - + count=1 cat "${working_dir}/processing/episode.srt" | while read this_line @@ -1087,28 +1228,28 @@ function generate_show_transcript() { echo "${this_line}" fi done > "${working_dir}/hpr${ep_num}.srt" - + # extract the txt version - + grep -Pv -- '-->|^$|^[0-9]+$' "${working_dir}/hpr${ep_num}.srt" > "${working_dir}/hpr${ep_num}.txt" - + if [[ ! -s "${working_dir}/hpr${ep_num}.srt" || ! -s "${working_dir}/hpr${ep_num}.txt" ]] then echo_error "The transcriptions files were not generated." ls -al "${working_dir}/hpr${ep_num}.srt" "${working_dir}/hpr${ep_num}.txt" fi - + } ################################################# -## Generate Final Report +## Generate Final Report function generate_final_report() { echo_debug "Generating the final report. generate_final_report()" - + final_report="${working_dir}/processing/hpr${ep_num}_report.html" - + for this_file_extension_to_check in flac mp3 ogg opus srt txt wav do if [[ ! -s "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" ]] @@ -1123,9 +1264,9 @@ function generate_final_report() { ls -al "${working_dir}/processing/${media_basename%.*}_media_report.html" echo_error "The initial report is not available.\"${working_dir}/processing/${media_basename%.*}_media_report.html\"" fi - + grep -Pv '|' "${working_dir}/processing/${media_basename%.*}_media_report.html" > "${final_report}" - + echo "$( echo "${synopsis}" ) @@ -1138,22 +1279,22 @@ $( echo "${synopsis}" ) ${working_dir}/processing//episode_tts.flac
@@ -1202,7 +1343,7 @@ $( cat "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_f
" >> "${final_report}"
done
-
+
echo "
@@ -1248,7 +1389,7 @@ function manual_final_review() {
ls -al "${final_report}"
echo_error "The files needed for to generate the final report information are not available."
fi
-
+
librewolf "${final_report}" >/dev/null 2>&1 &
# # # # bluefish "${shownotes_edited}" >/dev/null 2>&1 &
# https://markdowntohtml.com/
@@ -1267,23 +1408,23 @@ function manual_final_review() {
# Register the assets with the hpr database
function register_assets() {
-
+
echo_debug "Registering the assets with the hpr database. register_assets()"
-
+
if [[ -s "${working_dir}/hpr${ep_num}_assets.csv" ]]
then
echo_debug "Removing \"${working_dir}/hpr${ep_num}_assets.csv\"."
rm -v "${working_dir}/hpr${ep_num}_assets.csv"
fi
-
+
echo '"episode_id","filename","extension","size", "sha1sum", "mime_type", "file_type"' | tee "${working_dir}/hpr${ep_num}_assets.csv"
-
+
for this_asset_filename in hpr${ep_num}.flac hpr${ep_num}.wav hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt $( find "${working_dir}/" -maxdepth 1 -type f -iname "hpr${ep_num}_image_*.*" )
do
this_asset_filename="$( basename "${this_asset_filename}" )"
echo_debug "Registering \"${this_asset_filename}\"."
this_asset="${working_dir}/${this_asset_filename}"
-
+
if [[ ! -s "${this_asset}" ]]
then
echo_error "Failed to register missing file \"${this_asset}\"."
@@ -1295,12 +1436,12 @@ function register_assets() {
this_asset_sha1sum="$( sha1sum "${this_asset}" | awk '{print $1}' )"
this_asset_mime_type=$( file --dereference --brief --mime "${this_asset}" )
this_asset_file_type=$( file --dereference --brief "${this_asset}" )
-
+
if [ "$( echo ${this_asset_file_type} | wc --chars )" -gt "130" ]
then
this_asset_file_type="${this_asset_mime_type}"
fi
-
+
variables=( ep_num this_asset_basename this_asset_extension this_asset_size this_asset_sha1sum this_asset_mime_type this_asset_file_type working_dir ep_num )
for variable in "${variables[@]}"
@@ -1312,7 +1453,7 @@ function register_assets() {
echo_debug "The variable \"${variable}\" is set to \"${!variable}\"";
fi
done
-
+
echo "${ep_num},\"${this_asset_basename}\",\"${this_asset_extension}\",\"${this_asset_size}\",\"${this_asset_sha1sum}\",\"${this_asset_mime_type}\",\"${this_asset_file_type}\"" | tee --append "${working_dir}/hpr${ep_num}_assets.csv"
done
@@ -1326,7 +1467,7 @@ function register_assets() {
then
echo_error "The asset json file \"${working_dir}/hpr${ep_num}_assets.json\" is missing.";
fi
-
+
response="$( curl --silent --netrc-file $HOME/.netrc --write-out '%{http_code}' --output /dev/null --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @"${working_dir}/hpr${ep_num}_assets.json" --header "Content-Type: application/json" )"
if [[ -z "${response}" || "${response}" != "200" ]]
then
@@ -1342,44 +1483,44 @@ function register_assets() {
function copy_files_to_origin_server() {
echo_debug "Copying the files to the origin server. copy_files_to_origin_server()"
-
+
check_variable_is_correct working_dir ep_num
-
+
# TODO get a origin server capable of storing all the files
for this_asset in hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt $( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_*.*" )
do
this_asset="$( basename ${this_asset} )"
this_file="${working_dir}/${this_asset}"
-
+
echo_debug "Copying \"${this_file}\" to the origin server."
-
+
if [[ ! -s "${this_file}" ]]
then
echo_error "Failed to transfer missing file \"${this_file}\"."
ls -al "${this_file}"
fi
-
+
if [ "$( ssh rsync.net ls hpr/eps/hpr${ep_num}/ 2>/dev/null | wc --lines )" -eq "0" ]
then
echo_debug "Creating \"hpr/eps/hpr${ep_num}/\" on the origin server."
ssh rsync.net mkdir hpr/eps/hpr${ep_num}/ 2>/dev/null
fi
-
+
rsync --archive --quiet --partial --progress "${this_file}" rsync.net:hpr/eps/hpr${ep_num}/${this_asset}
-
+
origin_sha1sum="$( echo $( ssh rsync.net "sha1 hpr/eps/hpr${ep_num}/${this_asset}" 2> /dev/null ) | awk '{print $NF}' )"
this_asset_sha1sum="$( sha1sum "${this_file}" | awk '{print $1}' )"
-
+
if [[ -z "${origin_sha1sum}" || -z "${this_asset_sha1sum}" ]]
then
echo_error "Could not determine the local/origin sha1sum for file \"${this_file}\"."
fi
-
+
if [ "${origin_sha1sum}" != "${this_asset_sha1sum}" ]
then
echo_error "The local sha1sum \"${origin_sha1sum}\" and origin \"${this_asset_sha1sum}\" are mismatched for file \"${this_file}\"."
fi
-
+
done
}
@@ -1388,9 +1529,9 @@ function copy_files_to_origin_server() {
function copy_derived_files_to_borg() {
echo_debug "Sending the derived files to the admin server borg. copy_derived_files_to_borg()"
-
+
check_variable_is_correct working_dir ep_num shownotes_html
-
+
for extension in flac mp3 ogg opus wav
do
if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]]
@@ -1399,7 +1540,7 @@ function copy_derived_files_to_borg() {
ls -al "${working_dir}/hpr${ep_num}.${extension}"
fi
done
-
+
echo_debug "rsync -ave ssh --partial --progress \"${working_dir}/hpr${ep_num}.{flac,mp3,ogg,opus,wav}\" hpradmin@borg:/data/IA/uploads/"
rsync -ave ssh --partial --progress "${working_dir}/hpr${ep_num}".{flac,mp3,ogg,opus,wav} hpradmin@borg:/data/IA/uploads/
rsync_error="${?}"
@@ -1407,7 +1548,7 @@ function copy_derived_files_to_borg() {
then
echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/\" failed with error ${rsync_error}"
fi
-
+
rsync -ave ssh --partial --progress "${working_dir}/hpr${ep_num}".{txt,srt} hpradmin@borg:/data/IA/uploads/hpr${ep_num}/
rsync_error="${?}"
if [ "${rsync_error}" -ne "0" ]
@@ -1435,19 +1576,19 @@ function copy_derived_files_to_borg() {
grep --perl-regexp ",${ep_num},.*SHOW_POSTED," "${processing_dir}/status.csv"
echo_debug "The show \"${ep_num}\" hasn't the correct status of \"SHOW_POSTED\" in the database."
fi
-
+
# Try and change the online db status to MEDIA_TRANSCODED
if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' "https://hub.hackerpublicradio.org/cms/status.php?ep_num=${ep_num}&status=MEDIA_TRANSCODED" )" != 200 ]
then
echo_error "Could not change the status of \"${ep_num}\" to \"MEDIA_TRANSCODED\""
- fi
-
+ fi
+
# Get the current status
if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ]
then
echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\""
fi
-
+
# Check the current status is correct MEDIA_TRANSCODED
response=$( grep --perl-regexp ",${ep_num},.*MEDIA_TRANSCODED," "${processing_dir}/status.csv" | head -1 | sed 's/,/ /g' )
if [ -z "${response}" ]
@@ -1455,11 +1596,11 @@ function copy_derived_files_to_borg() {
grep --perl-regexp ",${ep_num},.*MEDIA_TRANSCODED," "${processing_dir}/status.csv"
echo_error "The show \"${ep_num}\" hasn't the correct status of \"MEDIA_TRANSCODED\" in the database."
fi
-
+
echo_debug "The show \"${ep_num}\" has the correct status of \"MEDIA_TRANSCODED\" in the database."
-
+
#TODO images
-
+
#
}
@@ -1470,21 +1611,50 @@ function copy_derived_files_to_borg() {
function wait_for_the_internet_archive_to_process() {
echo_debug "Waiting for the Internet Archive to finish processing. wait_for_the_internet_archive_to_process()"
-
- if [ -z "${ep_num}x" ]
- then
- echo_error "Could not find the episode number while uploading to the Internet Archive"
- fi
+
+ check_variable_is_correct ep_num
+
+ while [ "$( ia tasks "hpr${ep_num}" | jq --slurp '[.[] | if .category == "catalog" then .status else empty end] | length' )" -ne "0" ]
+ do
+ echo_debug "Waiting for the Internet Archive to finish processing \"hpr${ep_num}\"."
+ date
+ sleep 1m
+ done
+
}
#################################################
# Send the derived files to the server borg to be sent to the Internet Archive
-function upload_to_the_internet_archive() {
- echo_debug "Sending the derived files to Internet Archive. upload_to_the_internet_archive()"
-
+function create_item_on_the_internet_archive() {
+ echo_debug "Sending the derived files to Internet Archive. create_item_on_the_internet_archive()"
+
+ check_variable_is_correct ep_num
+
+ if [ ! -s "${working_dir}/hpr${ep_num}.txt" ]
+ then
+ echo_error "The Internet Archive \"${working_dir}/hpr${ep_num}.txt\" is missing."
+ fi
+
+
+ echo ia upload hpr${ep_num} "${working_dir}/hpr${ep_num}.txt" --metadata=mediatype:audio --metadata="contributor:HackerPublicRadio" --metadata="creator:HPR Volunteers" --metadata="date:2025-05-05" --metadata="description:This show is a placeholder and will be updated soon." --metadata="language:eng" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" --metadata="title:A placeholder for hpr${ep_num}." --metadata=reviews-allowed:none --header x-archive-keep-old-version:0 --retries=5 --no-derive --no-backup
+
+# curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}"
+# curl --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${working_dir}/shownotes.json"
+
+
+ wait_for_the_internet_archive_to_process
+
+}
+
+#################################################
+# Send the derived files to the server borg to be sent to the Internet Archive
+
+function upload_files_to_the_internet_archive() {
+ echo_debug "Sending the derived files to Internet Archive. upload_files_to_the_internet_archive()"
+
check_variable_is_correct working_dir ep_num shownotes_html
-
+
# hpr4371.flac
# hpr4371.mp3
# hpr4371.ogg
@@ -1493,42 +1663,46 @@ function upload_to_the_internet_archive() {
# hpr4371.srt
# hpr4371.txt
# hpr4371.wav
-#
+#
- find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \)
-
-
- for extension in flac mp3 ogg opus _source.* srt txt wav
+ if [ "$( find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \) | wc --lines )" -ne "8" ]
+ then
+ echo_error "There are not 8 derived files for the Internet Archive."
+ fi
+
+ while read this_ia_file
do
- if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]]
+ if [[ ! -s "${this_ia_file}" ]]
then
- echo_error "The derived files to the Internet Archive are missing \"hpr${ep_num}.${extension}\"."
- ls -al "${working_dir}/hpr${ep_num}.${extension}"
+ ls -al "${this_ia_file}"
+ echo_error "The derived files to the Internet Archive are missing \"${this_ia_file}\"."
fi
- done
-#
+ done < <( find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \) )
+
+
+#
# "${working_dir}/hpr${ep_num}.${extension}"
-#
+#
# ep_num="4371"
# ia upload hpr${ep_num} "${working_dir}/hpr${ep_num}.txt" --metadata=mediatype:audio --metadata="contributor:HackerPublicRadio" --metadata="creator:HPR Volunteers" --metadata="date:2025-05-05" --metadata="description:This show is a placeholder and will be updated soon." --metadata="language:eng" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" --metadata="title:A placeholder for hpr${ep_num}." --metadata=reviews-allowed:none --header x-archive-keep-old-version:0 --retries=5 --no-derive --no-backup
-#
+#
# wait for ia to finish
# ia tasks "hpr${ep_num}" | jq --slurp '[.[] | if .category == "catalog" then .status else empty end] | length'
# locate placeholder
-
+
# ia upload hpr4321_a686b0995e77c32d9e6566596ed9e4a2 /home/ken/tmp/hpr/processing/1739652162_4321_2025-02-24_624fd1a95db1d05e693eaf06e2f69b8667b0fc42edbc9/hpr4321.txt --metadata=mediatype:audio --metadata="collection:test_collection" --metadata="contributor:HackerPublicRadio" --metadata="creator:Klaatu" --metadata="date:2025-02-24" --metadata="description:This show is a placeholder and will be updated soon." --metadata="language:eng" --metadata="licenseurl:http://creativecommons.org/licenses/by-sa/4.0" --metadata="title:A placeholder for hpr4321." --metadata=reviews-allowed:none --header x-archive-keep-old-version:0 --retries=5 --no-derive --no-backup
# locate placeholder
# for extension in .flac .mp3 .ogg .opus _source.ogg .srt .txt .wav
-# do
+# do
# echo ia upload hpr${ep_num} "${working_dir}/hpr${ep_num}${extension}" --header "x-archive-keep-old-version:0" --retries=5 --no-derive --no-backup
# done
-#
+#
# ia upload hpr hpr4283_image_* --header "x-archive-keep-old-version:0" --retries=5 --no-derive --no-backup
-
+
# # # # for ext in flac mp3 ogg opus wav
# # # # do
# # # # if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]]
@@ -1537,13 +1711,13 @@ function upload_to_the_internet_archive() {
# # # # ls -al "${working_dir}/hpr${ep_num}.${extension}"
# # # # fi
# # # # done
-# # # #
+# # # #
# # # # I don't know if you noticed in the documentation, but there are some key things you need to bear in mind when creating a new IA item:
-# # # #
+# # # #
# # # # You MUST upload the metadata first, and the way things seem to be organised, your item MUST have a file with it. That's why my test uploaded that 1 second WAV file. This is a constraint in the ia tool and was in the days of using the S3-like interface, so I imagine it's built-in to the IA software.
# # # # When creating the metadata you MUST define the mediatype, and since this controls some of what you can upload, using audio seems to be vital for HPR stuff.
# # # # I discovered (by accident) that if the metadata phase fails and the next phase (uploading files) continues to run, an item will be created but it will be a default thing which YOU CAN'T CHANGE! You have to plead with the people at info@archive.org to reset this item (they will never delete) because you can't change the defaults to the right values. If this happens and is not fixed, then HPR has effectively lost a slot (at least I tell myself this so that I don't inadvertently screw up an upload).
-# # #
+# # #
# # # ia upload hpr_761c0f557b87090db3f8d4d9bce7fc70 \
# # # test.wav \
# # # --metadata="mediatype:audio" \
@@ -1577,7 +1751,7 @@ function upload_to_the_internet_archive() {
# # # # --metadata="title:hpr4295 :: Three Holiday Hacks from 2023" \
# # # # --header x-archive-keep-old-version:0 \
# # # # --retries=5 --no-derive --no-backup
-# # # #
+# # # #
# ia upload hpr4295 /data/IA/uploads/hpr4295.wav \
@@ -1601,24 +1775,24 @@ function upload_to_the_internet_archive() {
#################################################
# Main exceution starts here
-#
+#
# This tool will process the HPR shows allowing the janitor to review the media and fix shownotes.
-#
+#
# TODO Add support for reserve queue - process validate and move to reserve dir
-# TODO Add support to reprocess processed shows - when given onlay new media reprocess it, update duration on hub, generate and download shownotes.{html,json} from db
+# TODO Add support to reprocess processed shows - when given onlay new media reprocess it, update duration on hub, generate and download shownotes.{html,json} from db
# TODO Add support for community news - reusing ^^^
# TODO Add support for stereo for some episodes that request it
# TODO Include links in extract_images_brute_force
# TODO take screenshots of the rendered episode on the hpr website
# TODO audio_channels default to mono - stereo as an option
-# TODO Add chapter support
+# TODO Add chapter support
# TODO incorporate direct upload to the IA
# TODO copy the files to the backup disk
# Get supplied working dir and ep_num if provided
if [ $# -gt 0 ]
then
- declare -A hash
+# declare -A hash
for argument
do
if [[ $argument =~ ^[^=]+=.*$ ]]
@@ -1635,35 +1809,46 @@ fi
# program_checks # We know that all the programs and variables are set
-#
+#
# get_working_dir $@ # We have a working directory and a valid json file
-#
+#
# get_episode_metadata $@ # We have all the metadata we need to process the show.
-#
+#
# extract_images_brute_force # We have extracted the images by brute force
-#
-# media_checks #
-#
+#
+# media_checks #
+#
# generate_initial_report # Generate Initial Report for review by the Janitors
-#
+#
# manual_shownotes_review # Janitors review audio and shownote. Skips if done.
-#
+#
# post_show_to_hpr_db # Posts the episode to HPR. Skips if it is already posted.
-#
+#
+
+#########################################################################################
+# Using DB info from here
+
+
+
+get_variables_from_episode_summary_json # Get the episode from HPR.
+
+
+
+#
# create_tts_summary # Generate text to speech summary
-#
+#
# generate_intro # Generate Intro from the intro theme with overlay of a lead in silence then the tts summary
-#
+#
# generate_parent_audio # Combines the intro, the episode, and the outro to a final cut.
-#
+#
# generate_derived_media # Generate the flac wav mp3 ogg opus files
-#
-# generate_show_transcript
-#
+#
+# generate_show_transcript
+#
# generate_final_report
-#
+#
# manual_final_review
-#
+#
# register_assets
# copy_files_to_origin_server
@@ -1672,16 +1857,18 @@ fi
# copy_derived_files_to_borg
-upload_to_the_internet_archive
+#create_item_on_the_internet_archive
+
+#upload_files_to_the_internet_archive
#for i in {4301..4305};do echo ${i};/home/ken/sourcecode/personal/bin/hpr-check-ccdn-links.bash ${i};done
-echo_debug "The End"
+echo_debug "The End"
exit 0
# if ${force_overwrite}
-# then
+# then
# echo_debug "The setting \"force_overwrite\" is set to true, so files will be overwritten."
-# else
+# else
# echo_debug "The setting \"force_overwrite\" is set to false, so when files exist the program will skip files if they exist."
# fi