#!/usr/bin/env bash # Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/ #============================================================ # git clone https://github.com/davmo/fix_tags.git # git clone https://repo.anhonesthost.net/HPR/hpr-tools.git ################################################# # Variables debug="1" force_overwrite=true # Sets the behavour when files exist or not processing_dir="$HOME/tmp/hpr/processing" # The directory where the files will be copied to for processing theme="${processing_dir}/theme.flac" # The hpr theme silence="${processing_dir}/silence.flac" # A segment of silence to offset the tts in the intro outro_flac="${processing_dir}/outro.flac" # The outro edited media outro_srt="${processing_dir}/outro.srt" # The outro subtitle file intro_srt="${processing_dir}/intro.srt" # The intro subtitle template file piper_bin="/opt/bin/piper/piper/piper" # The location of the text to speech engine piper_voice="/opt/bin/piper/piper/piper-voices/en/en_US/lessac/medium/en_US-lessac-medium.onnx" ################################################# # Display Error message, display help and exit function echo_error() { echo -e "ERROR: $@" #1>&2 display_help_and_exit exit 1 } ################################################# # Display Information message function echo_debug() { if [ "${debug}" != "0" ] then echo -e "INFO: $@" #1>&2 fi } ################################################# # Display Help function display_help_and_exit() { echo_debug "For more information see https://repo.anhonesthost.net/HPR/hpr_documentation" exit 1 } ################################################# # Program Checks function program_checks() { echo_debug "Completing program checks. program_checks()" if [ ! -d "${processing_dir}" ] then echo_error "The \"${processing_dir}\" is required but is not defined." fi if [[ ! -s "${theme}" || ! -s "${silence}" || ! -s "${outro_flac}" || ! -s "${outro_srt}" || ! -s "${intro_srt}" ]] then echo_error "The files for the theme are not available." ls -al "${theme}" "${silence}" "${outro_flac}" "${outro_srt}" "${intro_srt}" fi function is_installed() { for this_program in "$@" do if ! command -v ${this_program} 2>&1 >/dev/null then echo_error "The application \"${this_program}\" is required but is not installed." fi done } is_installed audio2image.bash awk base64 cat csvtojson curl date detox eval extract_images ffprobe file find grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget for arg in $* do if [ "$( echo "${arg}" | grep --count --ignore-case --perl-regexp -- '-h|--help' )" -ne "0" ] then echo_debug "Process the next SHOW_SUBMITTED show from the queue." echo_debug "If a directory is provided then the shownotes.json will be used." fi done } ################################################# # Program Checks function check_variable_is_correct() { #echo_debug "Checking variables ${*}. check_variable_is_correct()" for argument in "$@" do case "${argument}" in working_dir) if [[ ! -d "${working_dir}" || -z "${working_dir}" ]] then echo_error "The \"working dir\" variable is missing." fi ;; ep_num) if [ -z "${ep_num}" ] then echo_error "The \"ep_num\" variable is missing." fi if [ "$( echo "${ep_num}" | grep --perl-regexp '^(0{0,3}[1-9]\d{0,2}|[1-9]\d{0,3})$' | wc --lines )" -eq "0" ] then echo_error "The \"ep_num\" variable is not a valid number between 1 and 9999." fi ;; shownotes_edited) if [[ ! -s "${shownotes_edited}" || -z "${shownotes_edited}" ]] then echo_error "The \"shownotes_edited\" variable/file is missing." fi if [ "$( file --brief --mime-type "${shownotes_edited}" | grep --count 'text/html' )" -ne "1" ] then echo_error "The \"shownotes_edited\" variable has not a valid \"text/html\" mime type." fi ;; episode_summary_json) if [[ ! -s "${episode_summary_json}" || -z "${episode_summary_json}" ]] then echo_error "The \"episode_summary_json\" variable/file is missing." fi if [ "$( file --brief --mime-type "${episode_summary_json}" | grep --count 'application/json' )" -ne "1" ] then echo_error "The \"episode_summary_json\" variable has not a valid \"application/json\" mime type." fi ;; assets_csv) if [[ ! -s "${assets_csv}" || -z "${assets_csv}" ]] then echo_error "The \"assets_csv\" variable/file is missing." fi if [ "$( file --brief --mime-type "${assets_csv}" | grep --count 'text/csv' )" -ne "1" ] then echo_error "The \"assets_csv\" variable has not a valid \"text/csv\" mime type." fi ;; assets_json) if [[ ! -s "${assets_json}" || -z "${assets_json}" ]] then echo_error "The \"assets_json\" variable/file is missing." fi if [ "$( file --brief --mime-type "${assets_json}" | grep --count 'application/json' )" -ne "1" ] then echo_error "The \"assets_json\" variable has not a valid \"application/json\" mime type." fi ;; album) if [[ -z "${album}" || "${album}" == "null" ]] then echo_error "The \"album\" variable is missing." fi if [ "$( echo "${album}" | grep --perl-regexp '^Hacker Public Radio$' | wc --lines )" -eq "0" ] then echo_error "The \"album\" variable is not \"Hacker Public Radio\"." fi ;; artist) if [[ -z "${artist}" || "${artist}" == "null" ]] then echo_error "The \"artist\" variable is missing." fi ;; comment) if [[ -z "${comment}" || "${comment}" == "null" ]] then echo_error "The \"comment\" variable is missing." fi ;; date) if [[ -z "${date}" || "${date}" == "null" ]] then echo_error "The \"date\" variable is missing." fi ;; duration) if [[ -z "${duration}" || "${duration}" == "null" ]] then echo_error "The \"duration\" variable is missing." fi if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] then echo_error "Invalid duration missing or outside range 30 to 30000." >&2 fi ;; duration_iso8601) if [[ -z "${duration_iso8601}" || "${duration_iso8601}" == "null" ]] then echo_error "The \"duration_iso8601\" variable is missing." fi ;; explicit) if [[ -z "${explicit}" || "${explicit}" == "null" ]] then echo_error "The \"explicit\" variable is missing." fi ;; genre) if [[ -z "${genre}" || "${genre}" == "null" ]] then echo_error "The \"genre\" variable is missing." fi ;; hostid) if [[ -z "${hostid}" || "${hostid}" == "null" ]] then echo_error "The \"hostid\" variable is missing." fi ;; license) if [[ -z "${license}" || "${license}" == "null" ]] then echo_error "The \"license\" variable is missing." fi ;; license_url) if [[ -z "${license_url}" || "${license_url}" == "null" ]] then echo_error "The \"license_url\" variable is missing." fi ;; summary) if [[ -z "${summary}" || "${summary}" == "null" ]] then echo_error "The \"summary\" variable is missing." fi ;; synopsis) if [[ -z "${synopsis}" || "${synopsis}" == "null" ]] then echo_error "The \"synopsis\" variable is missing." fi ;; tags) if [[ -z "${tags}" || "${tags}" == "null" ]] then echo_error "The \"tags\" variable is missing." fi ;; title) if [[ -z "${title}" || "${title}" == "null" ]] then echo_error "The \"title\" variable is missing." fi ;; track) if [[ -z "${track}" || "${track}" == "null" ]] then echo_error "The \"track\" variable is missing." fi ;; year) if [[ -z "${year}" || "${year}" == "null" ]] then echo_error "The \"year\" variable is missing." fi ;; *) echo_error "An unknown variable \"${argument}\" was provided." ;; esac done } ################################################# # Get the next show in the queue function get_ep_num_from_hpr_hub() { echo_debug "Processing the next HPR Show in the queue. get_ep_num_from_hpr_hub()" if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] then echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" fi if [ ! -s "${processing_dir}/status.csv" ] then echo_error "Failed to retrieve \"${processing_dir}/status.csv\" from server." fi response=$( cat "${processing_dir}/status.csv" | grep ',SHOW_SUBMITTED,' | head -1 | sed 's/,/ /g' ) if [ -z "${response}" ] then echo_debug "Getting a list of all the reservations." curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n echo_error "There appear to be no more shows with the status \"SHOW_SUBMITTED\"." fi timestamp_epoc="$( echo ${response} | awk '{print $1}' )" ep_num="$( echo ${response} | awk '{print $2}' )" ep_date="$( echo ${response} | awk '{print $3}' )" key="$( echo ${response} | awk '{print $4}' )" email="$( echo ${response} | awk '{print $6}' )" email_unpadded="$( echo $email | sed 's/.nospam@nospam./@/g' )" hpr_upload_dir="/home/hpr/upload/${timestamp_epoc}_${ep_num}_${ep_date}_${key}" source_dir="hpr:${hpr_upload_dir}" dest_dir="${timestamp_epoc}_${ep_num}_${ep_date}_${key}" working_dir="${processing_dir}/${dest_dir}" echo_debug "Downloading hpr${ep_num} from ${email_unpadded}" echo_debug "" echo_debug "rsync -ave ssh --partial --progress ${source_dir}/ ${working_dir}/" ssh hpr -t "detox -v ${hpr_upload_dir}/" rsync -ave ssh --partial --progress ${source_dir}/ ${working_dir}/ if [ ! -s "${working_dir}/shownotes.json" ] then echo_error "The working dir is missing the shownotes file \"${working_dir}/shownotes.json\"" fi if [ "$( file --brief --mime-type "${working_dir}/shownotes.json" | grep --count "application/json" )" -eq 0 ] then echo_error "The \"${working_dir}/shownotes.json\" is not a \"application/json\" file" fi } ################################################# # Get the show information from a local directory function get_ep_num_from_local_dir() { echo_debug "Processing a local directory. get_ep_num_from_local_dir()" check_variable_is_correct working_dir if [ ! -s "${working_dir}/shownotes.json" ] then echo_debug "Could not find a \"shownotes.json\" in the working directory \"${working_dir}/\"" if [ -z "${ep_num}" ] then echo_debug "Attempting to get episode number from the \"${working_dir}\"" ep_num="$( echo "${working_dir}" | grep --color=never --perl-regexp --only-matching '_[0-9]{4}_' | sed 's/_//g' )" fi if [ -z "${ep_num}" ] then echo_error "Could not find the episode number - please rerun with \"$0 ep_num=9876\"" fi echo_debug "Attempting to download information for episode \"${ep_num}\"" if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${working_dir}/shownotes.json" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted." fi if [ ! -s "${working_dir}/shownotes.json" ] then echo_error "The Episode information for hpr${ep_num} failed to download." fi fi if [[ -s "${working_dir}/shownotes.json" && "$( file --brief --mime-type "${working_dir}/shownotes.json" | grep --count "application/json" )" -eq 0 ]] then echo_error "\"${working_dir}/shownotes.json\" is not a \"application/json\" file" fi } ################################################# # Get the show either the next in the queue # or from a local queue directory function get_working_dir() { echo_debug "Getting working directory and populating show information. get_working_dir()" if [ $# -eq 0 ] then get_ep_num_from_hpr_hub fi check_variable_is_correct working_dir if [ -z "${ep_num}" ] then get_ep_num_from_local_dir $@ check_variable_is_correct ep_num fi if [ ! -s "${working_dir}/shownotes.json" ] then echo_error "The working dir \"${working_dir}\" could not be found." fi echo_debug "Found working directory as \"${working_dir}\"" if [ ! -d "${working_dir}/processing/" ] then mkdir -v "${working_dir}/processing/" if [ ! -d "${working_dir}/processing/" ] then echo_error "Could not create the processing directory \"${working_dir}/processing/\"." fi fi } ################################################# # Provides all the metadata we need to process the show. function get_episode_metadata() { echo_debug "Extracting the episode metadata. get_episode_metadata()" if [[ -s "${working_dir}/shownotes.json" && "$( file --brief --mime-type "${working_dir}/shownotes.json" | grep --count "application/json" )" -eq 0 ]] then echo_error "\"${working_dir}/shownotes.json\" is not a \"application/json\" file" fi shownotes_json="${working_dir}/shownotes.json" shownotes_html="${working_dir}/shownotes.html" shownotes_edited="${working_dir}/shownotes_edited.html" hostid="$( jq --raw-output '.host.Host_ID' ${shownotes_json} )" host_name="$( jq --raw-output '.host.Host_Name' ${shownotes_json} )" email="$( jq --raw-output '.host.Host_Email' ${shownotes_json} )" email_padded="$( echo $email | sed 's/@/.nospam@nospam./g' )" title="$( jq --raw-output '.episode.Title' ${shownotes_json} )" summary="$( jq --raw-output '.episode.Summary' ${shownotes_json} )" series_id="$( jq --raw-output '.episode.Series' ${shownotes_json} )" series_name="$( jq --raw-output '.episode.Series_Name' ${shownotes_json} )" explicit="$( jq --raw-output '.episode.Explicit' ${shownotes_json} )" episode_license="$( jq --raw-output '.episode.Show_License' ${shownotes_json} )" ep_date="$( jq --raw-output '.metadata.Episode_Date' ${shownotes_json} )" ep_num="$( jq --raw-output '.metadata.Episode_Number' ${shownotes_json} )" key="$( jq --raw-output '.metadata.Key' ${shownotes_json} )" tags="$( jq --raw-output '.episode.Tags' ${shownotes_json} )" host_license="$( jq --raw-output '.host.Host_License' ${shownotes_json} )" host_profile="$( jq --raw-output '.host.Host_Profile' ${shownotes_json} )" remote_media="$( jq --raw-output '.metadata.url' ${shownotes_json} )" shownotes_json_sanatised=$( jq 'del(.episode.Show_Notes, .metadata.Host_IP)' "${shownotes_json}" ) echo_debug "Extracting shownotes html from json file." jq --raw-output '.episode.Show_Notes' "${shownotes_json}" > "${shownotes_html}" if [ ! -s "${shownotes_html}" ] then echo_error "Failed to extract the shownote html file \"${shownotes_html}\"" fi ( echo '' cat "${shownotes_html}" echo ' ' ) | sponge "${shownotes_html}" variables=( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised ) for variable in "${variables[@]}" do if [[ -z ${!variable} && "${variable}" != "remote_media" ]] then # indirect expansion here echo_error "The variable \"${variable}\" is missing."; else echo_debug "The variable \"${variable}\" is set to \"${!variable}\""; fi done # Argument Override if [ $# -gt 0 ] then declare -A hash for argument do if [[ $argument =~ ^[^=]+=.*$ ]] then this_key="${argument%=*}" this_value="${argument#*=}" this_value="$( echo "${this_value}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )" eval "${this_key}=${this_value}" echo_debug "Replacing \"${this_key}\" with \"${this_value}\"." fi done fi # Hosts need to exist in the database if [ "${hostid}" == '0' ] then echo_error "The hostid is 0. Create the host and use \"hostid=???\" to override" fi } ################################################# # Extract_images by brute force function extract_images_brute_force() { echo_debug "Extracting images with grep. extract_images_brute_force()" if [ -s "${shownotes_edited}" ] then echo_debug "There is already an edited version of the shownotes at \"${shownotes_edited}\", slipping image extraction." return fi if [[ -z "${shownotes_html}" || ! -s "${shownotes_html}" ]] then echo_error "The shownotes_html file \"${shownotes_html}\" could not be found." fi ## TODO Temp fix until https://repo.anhonesthost.net/HPR/hpr-tools/issues/3 is available sed "s#>#>\n#g" "${shownotes_html}" | sponge "${shownotes_html}" # Extract embedded images image_count_embedded="1" for image in $( grep --color=never --perl-regexp --only-matching 'data:image/[^;]*;base64,\K[a-zA-Z0-9+/=]*' "${shownotes_html}" ) do this_image="${working_dir}/hpr${ep_num}_image_${image_count_embedded}" echo -n "$image" | base64 -di > ${this_image} this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" mv -v "${this_image}" "${this_image}.${this_ext}" this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" if [ "${this_width}" -gt "400" ] then echo_debug "Generating thumbnail for embedded image \"${this_image}.${this_ext}\"." magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" fi ((image_count_embedded=image_count_embedded+1)) done # Download referenced images image_count_external="1" for image in $( grep --color=never --perl-regexp --only-matching '' "${shownotes_html}" | awk -F 'src=' '{print $2}' | awk -F '"' '{print $2}' ) do this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" wget "${image}" --output-document=${this_image} if [ -s "${this_image}" ] then this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" mv -v "${this_image%.*}" "${this_image}.${this_ext}" this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" if [ "${this_width}" -gt "400" ] then echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" fi ((image_count_external=image_count_external+1)) else echo_debug "Could not download external image \"${image}\"." fi done cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" if [ "${image_count_embedded}" -gt "1" ] then image_count_embedded="1" touch "${shownotes_html}.embedded_images" cat "${shownotes_html}" | while read this_line do if [ "$( echo "${this_line}" | grep --count "LOCAL_IMAGE_REMOVED" )" -eq "0" ] then echo "${this_line}" >> "${shownotes_html}.embedded_images" else this_image="$( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_${image_count_embedded}.*" )" if [[ -z "${this_image}" || ! -s "${this_image}" ]] then echo_error "Unable to find an image for \"${image_count_embedded}\", \"${this_image}\"." fi this_image="$( basename "${this_image}" )" this_image_tn="$( find "${working_dir}/" -type f -iname "${this_image%.*}_tn.*" )" if [[ -z "${this_image_tn}" || ! -s "${this_image_tn}" ]] then echo "${this_line}" | sed "s@LOCAL_IMAGE_REMOVED@${this_image}@g" >> "${shownotes_html}.embedded_images" else this_image_tn="$( basename "${this_image_tn}" )" echo "" >> "${shownotes_html}.embedded_images" echo "${this_line}" | sed "s@LOCAL_IMAGE_REMOVED@${this_image_tn}@g" >> "${shownotes_html}.embedded_images" echo "" >> "${shownotes_html}.embedded_images" fi ((image_count_embedded=image_count_embedded+1)) fi done mv -v "${shownotes_html}.embedded_images" "${shownotes_html}" else echo_debug "No embedded images found. ${image_count_embedded}" fi if [ "${image_count_external}" -gt "1" ] then image_count_external="1" touch "${shownotes_html}.external_images" cat "${shownotes_html}" | remove-image.pl | while read this_line do if [ "$( echo "${this_line}" | grep --count "REMOTE_IMAGE_REMOVED" )" -eq "0" ] then echo "${this_line}" >> "${shownotes_html}.external_images" else this_image="$( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_ext_${image_count_external}.*" )" if [[ -z "${this_image}" || ! -s "${this_image}" ]] then echo_error "Unable to find an image for \"${image_count_external}\", \"${this_image}\"." fi this_image="$( basename "${this_image}" )" this_image_tn="$( find "${working_dir}/" -type f -iname "${this_image%.*}_tn.*" )" if [[ -z "${this_image_tn}" || ! -s "${this_image_tn}" ]] then echo "${this_line}" | sed "s@REMOTE_IMAGE_REMOVED@${this_image}@g" >> "${shownotes_html}.external_images" else this_image_tn="$( basename "${this_image_tn}" )" echo "" >> "${shownotes_html}.external_images" echo "${this_line}" | sed "s@REMOTE_IMAGE_REMOVED@${this_image_tn}@g" >> "${shownotes_html}.external_images" echo "" >> "${shownotes_html}.external_images" fi ((image_count_external=image_count_external+1)) fi done mv -v "${shownotes_html}.external_images" "${shownotes_html}" else echo_debug "No external images found." fi ## TODO End Temp fix } ################################################# ## Media Checks function media_checks() { echo_debug "Running media checks. media_checks()" if [[ -n "${remote_media}" && "${remote_media}" != "null" ]] then echo_debug "Fetching remote media from \"${remote_media}\"" wget --timestamping --directory-prefix="${working_dir}/" "${remote_media}" if [ $? -ne 0 ] then echo_error "Could not get the remote media" fi fi media=$( find "${working_dir}/" -maxdepth 1 -type f -exec file --mime-type {} \; | grep -Ei ' audio/| video/' | awk -F ': ' '{print $1}' ) if [ -z "${media}" ] then find "${working_dir}/" -type f echo_error "Can't find any media in \"${working_dir}/\"" fi media_basename="$( basename "${media}" )" if [ -z "${media_basename}" ] then echo_error "Could not create the media_basename \"${media_basename}/\"" fi if [ "$( echo "${media}" | wc --lines )" -ne 1 ] then echo "Multiple files found. Which one do you want to use ?" select this_media in $( echo "${media}" ) do ls -al "${this_media}" media="${this_media}" break done fi echo_debug "You selected \"${media}\"." if [[ -z "${media}" || ! -s "${media}" ]] then echo_error "Could not find the media \"${media}/\"" fi shownotes_srt="${media%.*}.srt" if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] then echo_error "Could not find the subtitles for media \"${media}\"" fi # Find duration duration=$( mediainfo --full --Output=JSON "${media}" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' ) if [[ -z "${duration}" || "${duration}" -lt "30" || "${duration}" -gt "30000" ]] then echo_error "Invalid duration found in \"${media}\"" >&2 fi echo_debug "The Duration is \"${duration}\" seconds from \"${media}\"" # Find number of channels ( 1=mono or 2=stereo) supplied_channels=$( mediainfo --full --Output=JSON "${media}" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Channels' ) if [[ -z "${supplied_channels}" || "${supplied_channels}" -lt "1" || "${supplied_channels}" -gt "2" ]] then echo_error "Invalid number of audio channels \"${supplied_channels}\" found in \"${media}\"" >&2 fi echo_debug "The number of audio channels is \"${supplied_channels}\" from \"${media}\" ." # Gernerate the Spectrum and Waveform image ffmpeg -hide_banner -loglevel error -y -i "${media}" -lavfi "showspectrumpic=s=960x540" "${working_dir}/processing/${media_basename%.*}_spectrum.png" audio2image.bash "${media}" && mv -v "${media%.*}.png" "${working_dir}/processing/${media_basename%.*}_waveform.png" # Getting metadata mediainfo "${media}" > "${working_dir}/processing/${media_basename%.*}_mediainfo.txt" exiftool "${media}" > "${working_dir}/processing/${media_basename%.*}_exiftool.txt" for check_file in spectrum.png waveform.png mediainfo.txt exiftool.txt do if [ ! -s "${working_dir}/processing/${media_basename%.*}_${check_file}" ] then echo_error "The ${check_file} file was not generated for the \"${working_dir}/processing/${media_basename%.*}_${check_file}\"" >&2 fi done ffprobe="$( ffprobe ${media} 2>&1 | grep Audio: | sed 's/^.\s//g' )" file_mime="$( file --brief --mime ${media} )" file_mime_type="$( file --brief --mime-type ${media} )" if [[ -z "${ffprobe}" || -z ${file_mime} || -s ${file_mime_type} ]] then echo "ffprobe: ${ffprobe}, file_mime: ${file_mime},file_mime_type: ${file_mime_type}" echo_error "Wasn't able to find mime metadata from \"${media}/\"" fi } ################################################# ## Generate Initial Report for review by the Janitors function generate_initial_report() { echo_debug "Generating the initial report. generate_initial_report()" # TODO list the images. echo " Hacker Public Radio ~ The Technology Community Podcast

Skip to Derived Media

Field Mapping

FieldValue
hostid${hostid}
host_name${host_name}
title${title}
summary${summary}
series_id${series_id}
series_name${series_name}
explicit${explicit}
episode_license${episode_license}
tags${tags}
host_license${host_license}
host_profile${host_profile}

Raw shownotes.json

${shownotes_json_sanatised}

Audio

mediainfo report

$( cat "${working_dir}/processing/${media_basename%.*}_mediainfo.txt" )

exiftool report

$( cat "${working_dir}/processing/${media_basename%.*}_exiftool.txt" )

Audio Spectrum

\"Spectrum\"

Audio Waveform

\"Waveform\"

${ffprobe}
${file_mime}


${media}

Transcript


$(cat "${shownotes_srt}" )

" > "${working_dir}/processing/${media_basename%.*}_media_report.html" } ################################################# ## Manually edit the shownotes to fix issues function manual_shownotes_review() { echo_debug "Validating the initial report. manual_shownotes_review()" if [[ -z "${shownotes_html}" || ! -s "${shownotes_html}" || ! -s "${working_dir}/processing/${media_basename%.*}_media_report.html" ]] then echo "shownotes_html: ${shownotes_html}" ls -al "${shownotes_html}" "${working_dir}/processing/${media_basename%.*}_media_report.html" echo_error "The files needed for to generate the inital report information are not available." fi if [ -s "${shownotes_edited}" ] then echo_debug "There is already an edited version of the shownotes at \"${shownotes_edited}\"." return fi cp -v "${shownotes_html}" "${shownotes_edited}" if [ ! -s "${shownotes_edited}" ] then echo_error "The edited shownotes are missing \"${shownotes_edited}\"." fi kate "${shownotes_edited}" >/dev/null 2>&1 & librewolf "${working_dir}/processing/${media_basename%.*}_media_report.html" >/dev/null 2>&1 & seamonkey "${shownotes_edited}" >/dev/null 2>&1 & # # # # bluefish "${shownotes_edited}" >/dev/null 2>&1 & # https://markdowntohtml.com/ read -p "Does the metadata 'look ok ? (N|y) ? " -n 1 -r echo # (optional) move to a new line if [[ ! $REPLY =~ ^[yY]$ ]] then echo_error "The final review was not approved." fi # remove extra wrappers that seamonkey adds grep --invert-match --perl-regexp '|head>|' "${shownotes_edited}" | sponge "${shownotes_edited}" # Check to see if images have been linked TODO make a loop for found images if [ "$( find "${working_dir}" -type f -iname "*_image_*" | wc --lines )" -ne "0" ] then if [ "$( grep --count "_image_" "${shownotes_edited}" )" -eq "0" ] then echo_error "The extracted images were not linked in the shownotes \"${shownotes_edited}\"." fi fi } ################################################# # Post show to HPR function post_show_to_hpr_db() { echo_debug "Posting the show to the HPR DB. post_show_to_hpr_db()" if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" == 200 ] then echo "WARN: The Episode hpr${ep_num} has already been posted" return fi if [ ! -s "${shownotes_edited}" ] then echo_error "Failed to find the extracted shownote html file \"${shownotes_edited}\"" fi notes="$( cat "${shownotes_edited}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )" host_profile_encoded="$( echo "${host_profile}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )" post_show_json="${working_dir}/post_show.json" echo "Sending:" echo " key=${key} ep_num=${ep_num} ep_date=${ep_date} email=${email_padded} title=${title} duration=${duration} summary=${summary} series_id=${series_id} series_name=${series_name} explicit=${explicit} episode_license=${episode_license} tags=${tags} hostid=${hostid} host_name=${host_name} host_license=${host_license} host_profile=${host_profile_encoded} notes=REMOVED" echo "{ \"key\": \"${key}\", \"ep_num\": \"${ep_num}\", \"ep_date\": \"${ep_date}\", \"email\": \"${email_padded}\", \"title\": \"${title}\", \"duration\": \"${duration}\", \"summary\": \"${summary}\", \"series_id\": \"${series_id}\", \"series_name\": \"${series_name}\", \"explicit\": \"${explicit}\", \"episode_license\": \"${episode_license}\", \"tags\": \"${tags}\", \"hostid\": \"${hostid}\", \"host_name\": \"${host_name}\", \"host_license\": \"${host_license}\", \"host_profile\": \"${host_profile_encoded}\", \"notes\": \"${notes}\" }" > "${post_show_json}" jq '.' "${post_show_json}" if [ $? -ne 0 ] then echo_error "The file \"${post_show_json}\" is not valid json." fi curl --netrc --include --request POST "https://hub.hackerpublicradio.org/cms/add_show_json.php" --header "Content-Type: application/json" --data-binary "@${post_show_json}" if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output /dev/null )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi } ################################################# # Get the function get_variables_from_episode_summary_json() { echo_debug "Creating Text to Speech summary. get_variables_from_episode_summary_json()" check_variable_is_correct ep_num working_dir if [ -z "${episode_summary_json}" ] then episode_summary_json="${working_dir}/episode_summary.json" fi if [ ! -s "${episode_summary_json}" ] then echo_debug "The \"episode_summary_json\" variable/file is missing." if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi fi check_variable_is_correct episode_summary_json for episode_summary_key in $( jq --raw-output '. | keys | @tsv' "${episode_summary_json}" ) do episode_summary_value="$( jq --raw-output ".${episode_summary_key}" "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" export "${episode_summary_key}=${episode_summary_value}" echo_debug "Setting \"${episode_summary_key}\" to \"${episode_summary_value}\" from \"$( basename ${episode_summary_json} )\"" check_variable_is_correct ${episode_summary_key} done duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )" check_variable_is_correct duration_iso8601 } ################################################# # Generate text to speech summary function create_tts_summary() { echo_debug "Creating Text to Speech summary. create_tts_summary()" check_variable_is_correct working_dir duration synopsis piper_bin piper_voice echo_debug "Converting text synopsis \"${synopsis}\" to speech." echo "${synopsis}" | "${piper_bin}" --model "${piper_voice}" --output_file "${working_dir}/processing/episode_tts.wav" if [ ! -s "${working_dir}/processing/episode_tts.wav" ] then echo_error "The text to speech episode summary was not created \"${working_dir}/processing/episode_tts.wav\"." fi } ################################################# # Generate Intro function generate_intro() { echo_debug "Generating the intro. generate_intro()" if [[ ! -s "${working_dir}/processing/episode_tts.wav" || ! -s "${theme}" || ! -s "${media}" || ! -s "${outro_flac}" || ! -d "${working_dir}/processing/" ]] then echo_error "The files for the intro are not available." ls -al "${working_dir}/processing/episode_tts.wav" "${theme}" "${media}" "${outro_flac}" "${working_dir}/processing/" fi # Everything needs to be in the same format for the intro, 1 channel (mono) Sampling rate 44.1 kHz ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/processing/episode_tts.wav" -ar 44100 -ac 1 "${working_dir}/processing//episode_tts.flac" # A level of silence is added at the beginning of the text to speech sox -V2 "${silence}" "${working_dir}/processing//episode_tts.flac" "${working_dir}/processing/episode_summary.flac" # The tracks are merged together resulting in the theme playing first, then after a period of silence the text to speech enters sox -V2 -m "${working_dir}/processing/episode_summary.flac" "${theme}" "${working_dir}/processing/episode_intro.flac" if [[ ! -s "${working_dir}/processing//episode_tts.flac" || ! -s "${working_dir}/processing/episode_summary.flac" || ! -s "${working_dir}/processing/episode_intro.flac" ]] then echo_error "The files for the theme audio sandwich are not available." ls -al "${working_dir}/processing//episode_tts.flac" "${working_dir}/processing/episode_summary.flac" "${theme}" "${working_dir}/processing/episode_intro.flac" fi } ################################################# # Generate parent audio - the sandwitch function generate_parent_audio() { echo_debug "Generating the parent audio - the sandwitch. generate_parent_audio()" if [[ ! -s "${working_dir}/processing/episode_intro.flac" || ! -s "${media}" || ! -s "${outro_flac}" ]] then echo_error "The files for the sandwich are not available." ls -al fi # Everything needs to be in the same format so the text to speech needs to be converted to 2 channel Sampling rate 44.1 kHz ffmpeg -hide_banner -loglevel error -y -i "${media}" -ar 44100 -ac 1 "${working_dir}/processing/episode_body.flac" # Combine the components together sox -V2 "${working_dir}/processing/episode_intro.flac" "${working_dir}/processing/episode_body.flac" "${outro_flac}" "${working_dir}/processing/episode_sandwitch.flac" # Normalise the audio ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/processing/episode_sandwitch.flac" -af loudnorm=I=-16:LRA=11:TP=-1.5 "${working_dir}/processing/episode_final.flac" } ################################################# # Generate derived media function generate_derived_media() { echo_debug "Generating derived audio. generate_derived_media()" if [[ ! -s "${working_dir}/processing/episode_final.flac" ]] then ls -al echo_error "The final cut is not available." fi episode_comment="$( jq --raw-output '.comment' "${episode_summary_json}" )" episode_year="$( echo "${ep_date}" | cut -c -4 )" # https://wiki.multimedia.cx/index.php?title=FFmpeg_Metadata for extension in flac wav mp3 ogg opus do echo_debug "Generating \"hpr${ep_num}.${extension}\"." ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/processing/episode_final.flac" \ -metadata title="${title}" \ -metadata artist="${host_name}" \ -metadata author="${host_name}" \ -metadata album="Hacker Public Radio" \ -metadata comment="${episode_comment} The license is ${episode_license}" \ -metadata year="${episode_year}" \ -metadata track="${ep_num}" \ -metadata genre="Podcast" \ -metadata language="English" \ -metadata copyright="${episode_license}" \ "${working_dir}/hpr${ep_num}.${extension}" fix_tags -album="Hacker Public Radio" -artist="${host_name}" -comment="${episode_comment} The license is ${episode_license}" -genre="Podcast" -title="${title}" -track="${ep_num}" -year="${episode_year}" "${working_dir}/hpr${ep_num}.${extension}" if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]] then echo_error "Failed to generate \"${working_dir}/hpr${ep_num}.${extension}\"." ls -al "${working_dir}/hpr${ep_num}.${extension}" fi done cp -v "${media}" "${working_dir}/hpr${ep_num}_source.${media##*.}" if [[ ! -s "${working_dir}/hpr${ep_num}_source.${media##*.}" ]] then echo_error "Failed to copy \"${working_dir}/hpr${ep_num}_source.${media##*.}\"." ls -al "${working_dir}/hpr${ep_num}_source.${media##*.}" fi } ################################################# # Generate Subtitles function generate_show_transcript() { echo_debug "Generate show transcript and subtitles. generate_show_transcript()" # TODO Currently processed elsewhere by hpr-get-and-transcode.bash and uploaded to hpr:upload/ to be synced with media above if [[ ! -s "${media}" || ! -s "${media%.*}.srt" || ! -s "${intro_srt}" || ! -s "${outro_srt}" || ! -s "${working_dir}/processing/episode_intro.flac" || ! -s "${working_dir}/processing/episode_body.flac" ]] then ls -al "${media}" "${media%.*}.srt" "${intro_srt}" "${outro_srt}" "${working_dir}/processing/episode_intro.flac" "${working_dir}/processing/episode_body.flac" echo_error "The transcriptions files are not available." fi # Copy in the intro subtitle template and replace each line with the text with the summary date="$( jq --raw-output '.date' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" title="$( jq --raw-output '.title' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" duration="$( jq --raw-output '.duration' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" duration_iso8601="$( \date -d@${duration} -u +%H:%M:%S )" artist="$( jq --raw-output '.artist' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" explicit="$( jq --raw-output '.explicit' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" license="$( jq --raw-output '.license' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" summary="$( jq --raw-output '.summary' "${episode_summary_json}" | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' )" if [[ -z "${date}" || "${date}" == "null" || -z "${title}" || "${title}" == "null" || -z "${duration_iso8601}" || "${duration_iso8601}" == "null" || -z "${artist}" || "${artist}" == "null" || -z "${explicit}" || "${explicit}" == "null" || -z "${license}" || "${license}" == "null" || -z "${summary}" || "${summary}" == "null" ]] then echo_error "Could not retrieve the synopsis for the text to speech." ls -al "${episode_summary_json}" fi REPLACE_LINE_1="This is Hacker Public Radio Episode ${ep_num}, for ${date}" REPLACE_LINE_2="Today's show is entitled, \"${title}\"" REPLACE_LINE_3="The host is ${artist} and the duration is ${duration_iso8601}" REPLACE_LINE_4="The flag is ${explicit}, and the license is ${license}" REPLACE_LINE_5="The summary is \"${summary}\"" cp -v ${intro_srt} "${working_dir}/processing/episode_intro.srt" cp -v ${outro_srt} "${working_dir}/processing/episode_outro.srt" sed -e "s~REPLACE_LINE_1~${REPLACE_LINE_1}~g" -e "s~REPLACE_LINE_2~${REPLACE_LINE_2}~g" -e "s~REPLACE_LINE_3~${REPLACE_LINE_3}~g" -e "s~REPLACE_LINE_4~${REPLACE_LINE_4}~g" -e "s~REPLACE_LINE_5~${REPLACE_LINE_5}~g" -i "${working_dir}/processing/episode_intro.srt" if [ "$( grep --count REPLACE_LINE "${working_dir}/processing/episode_intro.srt" )" -ne "0" ] then echo_error "The intro subtitles were not correctly generated \"${working_dir}/processing/episode_intro.srt\"." fi # Time shift the media subtitles on by the duration of the intro wav file # https://trac.ffmpeg.org/wiki/UnderstandingItsoffset itsoffset_intro="$( mediainfo --full --Output=JSON "${working_dir}/processing/episode_intro.flac" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' )" if [[ -z "${itsoffset_intro}" || "${itsoffset_intro}" == "null" ]] then echo_error "Could not retrieve the itsoffset_intro to correct the timing of the subtitles." fi ffmpeg -hide_banner -loglevel error -y -itsoffset "${itsoffset_intro}" -i "${media%.*}.srt" -c copy "${working_dir}/processing/episode_body.srt" # Timeshift the outro by the duration of the intro and the supplied media itsoffset_body="$( mediainfo --full --Output=JSON "${working_dir}/processing/episode_body.flac" | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' )" if [[ -z "${itsoffset_body}" || "${itsoffset_body}" == "null" ]] then echo_error "Could not retrieve the itsoffset_body to correct the timing of the subtitles." fi itsoffset_body=$((itsoffset_intro + $itsoffset_body)) ffmpeg -hide_banner -loglevel error -y -itsoffset "${itsoffset_body}" -i "${working_dir}/processing/episode_outro.srt" -c copy "${working_dir}/processing/episode_outro_shifted.srt" # Combine the intro, timeshifted media subtitles, and the timeshifted outro subtitles. cat "${working_dir}/processing/episode_intro.srt" "${working_dir}/processing/episode_body.srt" "${working_dir}/processing/episode_outro_shifted.srt" > "${working_dir}/processing/episode.srt" # Parse the resulting subtitle file fixing the numberic counter # https://en.wikipedia.org/wiki/SubRip count=1 cat "${working_dir}/processing/episode.srt" | while read this_line do if [ "$( echo "${this_line}" | grep -c --perl-regexp '^[0-9]+$' )" -eq "1" ] then echo "${count}" count=$((count+1)) else echo "${this_line}" fi done > "${working_dir}/hpr${ep_num}.srt" # extract the txt version grep -Pv -- '-->|^$|^[0-9]+$' "${working_dir}/hpr${ep_num}.srt" > "${working_dir}/hpr${ep_num}.txt" if [[ ! -s "${working_dir}/hpr${ep_num}.srt" || ! -s "${working_dir}/hpr${ep_num}.txt" ]] then echo_error "The transcriptions files were not generated." ls -al "${working_dir}/hpr${ep_num}.srt" "${working_dir}/hpr${ep_num}.txt" fi } ################################################# ## Generate Final Report function generate_final_report() { echo_debug "Generating the final report. generate_final_report()" final_report="${working_dir}/processing/hpr${ep_num}_report.html" for this_file_extension_to_check in flac mp3 ogg opus srt txt wav do if [[ ! -s "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" ]] then ls -al "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" echo_error "The generated media is missing \"${this_file_extension_to_check}\"." fi done if [[ ! -s "${working_dir}/processing/${media_basename%.*}_media_report.html" ]] then ls -al "${working_dir}/processing/${media_basename%.*}_media_report.html" echo_error "The initial report is not available.\"${working_dir}/processing/${media_basename%.*}_media_report.html\"" fi grep -Pv '|' "${working_dir}/processing/${media_basename%.*}_media_report.html" > "${final_report}" echo "

Text To Speech

$( echo "${synopsis}" )


${working_dir}/processing//episode_tts.flac


" >> "${final_report}" for this_file_extension_to_check in flac mp3 ogg opus wav do ffmpeg -hide_banner -loglevel error -y -i "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" -lavfi "showspectrumpic=s=960x540" "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_spectrum.png" audio2image.bash "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" && mv -v "${working_dir}/hpr${ep_num}.png" "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_waveform.png" mediainfo "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" > "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_mediainfo.txt" exiftool "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" > "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_exiftool.txt" ffprobe "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" 2>&1 | grep Audio: | sed 's/^.\s//g' > "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_ffprobe.txt" file --brief --mime "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" >> "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_ffprobe.txt" this_file_mime_type="$( file --brief --mime-type "${working_dir}/hpr${ep_num}.${this_file_extension_to_check}" )" for this_file_to_check in spectrum.png waveform.png mediainfo.txt exiftool.txt ffprobe.txt do if [[ ! -s "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_${this_file_to_check}" ]] then ls -al "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_${this_file_to_check}" echo_error "The inital report information is missing \"${this_file_to_check}\"." fi done echo "

Report for derived media file \"hpr${ep_num}.${this_file_extension_to_check}\"

mediainfo report

$( cat "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_mediainfo.txt" )

exiftool report

$( cat "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_exiftool.txt" )

Audio Spectrum

\"Spectrum\"

Audio Waveform

\"Waveform\"

$( cat "${working_dir}/processing/hpr${ep_num}_${this_file_extension_to_check}_ffprobe.txt" )


${working_dir}/hpr${ep_num}.${this_file_extension_to_check}

" >> "${final_report}" done echo "

Rendered shownotes.html


$(cat "${shownotes_edited}" )

Subtitle File


$(cat "${working_dir}/hpr${ep_num}.srt" )

Transcript File


$(cat "${working_dir}/hpr${ep_num}.txt" )

" >> "${final_report}" } ################################################# ## Manually edit the shownotes to fix issues function manual_final_review() { echo_debug "Validating the final report. manual_final_review()" if [[ -z "${final_report}" || ! -s "${final_report}" ]] then ls -al "${final_report}" echo_error "The files needed for to generate the final report information are not available." fi librewolf "${final_report}" >/dev/null 2>&1 & # # # # bluefish "${shownotes_edited}" >/dev/null 2>&1 & # https://markdowntohtml.com/ read -p "Does the metadata 'look ok ? (N|y) ? " -n 1 -r echo # (optional) move to a new line if [[ ! $REPLY =~ ^[yY]$ ]] then echo "skipping...." echo_error "The final review was not approved." fi } ################################################# # Register the assets with the hpr database function register_assets() { echo_debug "Registering the assets with the hpr database. register_assets()" check_variable_is_correct working_dir ep_num assets_csv="${working_dir}/hpr${ep_num}_assets.csv" assets_json="${working_dir}/hpr${ep_num}_assets.json" if [[ -s "${assets_csv}" ]] then echo_debug "Removing \"${assets_csv}\"." rm -v "${assets_csv}" fi echo '"episode_id","filename","extension","size", "sha1sum", "mime_type", "file_type"' | tee "${assets_csv}" for this_asset_filename in hpr${ep_num}.flac hpr${ep_num}.wav hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt $( find "${working_dir}/" -maxdepth 1 -type f -iname "hpr${ep_num}_image_*.*" ) do this_asset_filename="$( basename "${this_asset_filename}" )" echo_debug "Registering \"${this_asset_filename}\"." this_asset="${working_dir}/${this_asset_filename}" if [[ ! -s "${this_asset}" ]] then echo_error "Failed to register missing file \"${this_asset}\"." ls -al "${this_asset}" fi this_asset_basename=$( basename "${this_asset}" ) this_asset_extension="${this_asset_basename##*.}" this_asset_size="$( ls -al "${this_asset}" | awk '{print $5}' )" this_asset_sha1sum="$( sha1sum "${this_asset}" | awk '{print $1}' )" this_asset_mime_type=$( file --dereference --brief --mime "${this_asset}" ) this_asset_file_type=$( file --dereference --brief "${this_asset}" ) if [ "$( echo ${this_asset_file_type} | wc --chars )" -gt "130" ] then this_asset_file_type="${this_asset_mime_type}" fi variables=( ep_num this_asset_basename this_asset_extension this_asset_size this_asset_sha1sum this_asset_mime_type this_asset_file_type working_dir ep_num ) for variable in "${variables[@]}" do if [ -z "${!variable}" ] then # indirect expansion here echo_error "The variable \"${variable}\" is missing."; else echo_debug "The variable \"${variable}\" is set to \"${!variable}\""; fi done echo "${ep_num},\"${this_asset_basename}\",\"${this_asset_extension}\",\"${this_asset_size}\",\"${this_asset_sha1sum}\",\"${this_asset_mime_type}\",\"${this_asset_file_type}\"" | tee --append "${assets_csv}" done if [ -s "${assets_csv}" ] then cat "${assets_csv}" | csvtojson | jq '{"assets":[.[]]}' | tee "assets_json" fi if [ ! -s "assets_json" ] then echo_error "The asset json file \"assets_json\" is missing."; fi response="$( curl --silent --netrc-file $HOME/.netrc --write-out '%{http_code}' --output /dev/null --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @"assets_json" --header "Content-Type: application/json" )" if [[ -z "${response}" || "${response}" != "200" ]] then echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" else echo_debug "The assets for episode hpr${ep_num} have been registered. The response was \"${response}\"" fi } ################################################# # Register the assets with the hpr database function copy_files_to_origin_server() { echo_debug "Copying the files to the origin server. copy_files_to_origin_server()" check_variable_is_correct working_dir ep_num # TODO get a origin server capable of storing all the files for this_asset in hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt $( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_*.*" ) do this_asset="$( basename ${this_asset} )" this_file="${working_dir}/${this_asset}" echo_debug "Copying \"${this_file}\" to the origin server." if [[ ! -s "${this_file}" ]] then echo_error "Failed to transfer missing file \"${this_file}\"." ls -al "${this_file}" fi if [ "$( ssh rsync.net ls hpr/eps/hpr${ep_num}/ 2>/dev/null | wc --lines )" -eq "0" ] then echo_debug "Creating \"hpr/eps/hpr${ep_num}/\" on the origin server." ssh rsync.net mkdir hpr/eps/hpr${ep_num}/ 2>/dev/null fi rsync --archive --quiet --partial --progress "${this_file}" rsync.net:hpr/eps/hpr${ep_num}/${this_asset} origin_sha1sum="$( echo $( ssh rsync.net "sha1 hpr/eps/hpr${ep_num}/${this_asset}" 2> /dev/null ) | awk '{print $NF}' )" this_asset_sha1sum="$( sha1sum "${this_file}" | awk '{print $1}' )" if [[ -z "${origin_sha1sum}" || -z "${this_asset_sha1sum}" ]] then echo_error "Could not determine the local/origin sha1sum for file \"${this_file}\"." fi if [ "${origin_sha1sum}" != "${this_asset_sha1sum}" ] then echo_error "The local sha1sum \"${origin_sha1sum}\" and origin \"${this_asset_sha1sum}\" are mismatched for file \"${this_file}\"." fi done } ################################################# # Send the derived files to the server borg to be sent to borg function copy_derived_files_to_borg() { echo_debug "Sending the derived files to the admin server borg. copy_derived_files_to_borg()" check_variable_is_correct working_dir ep_num shownotes_html for extension in flac mp3 ogg opus wav do if [[ ! -s "${working_dir}/hpr${ep_num}.${extension}" ]] then echo_error "The derived files to the admin server borg is missing \"hpr${ep_num}.${extension}\"." ls -al "${working_dir}/hpr${ep_num}.${extension}" fi done echo_debug "rsync -ave ssh --partial --progress \"${working_dir}/hpr${ep_num}.{flac,mp3,ogg,opus,wav}\" hpradmin@borg:/data/IA/uploads/" rsync -ave ssh --partial --progress "${working_dir}/hpr${ep_num}".{flac,mp3,ogg,opus,wav} hpradmin@borg:/data/IA/uploads/ rsync_error="${?}" if [ "${rsync_error}" -ne "0" ] then echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/\" failed with error ${rsync_error}" fi rsync -ave ssh --partial --progress "${working_dir}/hpr${ep_num}".{txt,srt} hpradmin@borg:/data/IA/uploads/hpr${ep_num}/ rsync_error="${?}" if [ "${rsync_error}" -ne "0" ] then echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/hpr${ep_num}/\" failed with error ${rsync_error}" fi rsync -ave ssh --partial --progress "${shownotes_edited}" hpradmin@borg:/data/IA/uploads/hpr${ep_num}/shownotes.html rsync_error="${?}" if [ "${rsync_error}" -ne "0" ] then echo_error "rsync to \"hpradmin@borg:/data/IA/uploads/hpr${ep_num}/shownotes.html\" failed with error ${rsync_error}" fi # Get the current status if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] then echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" fi # Check the current status is correct SHOW_POSTED response=$( grep --perl-regexp ",${ep_num},.*SHOW_POSTED," "${processing_dir}/status.csv" | head -1 | sed 's/,/ /g' ) if [ -z "${response}" ] then grep --perl-regexp ",${ep_num},.*SHOW_POSTED," "${processing_dir}/status.csv" echo_debug "The show \"${ep_num}\" hasn't the correct status of \"SHOW_POSTED\" in the database." fi # Try and change the online db status to MEDIA_TRANSCODED if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' "https://hub.hackerpublicradio.org/cms/status.php?ep_num=${ep_num}&status=MEDIA_TRANSCODED" )" != 200 ] then echo_error "Could not change the status of \"${ep_num}\" to \"MEDIA_TRANSCODED\"" fi # Get the current status if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${processing_dir}/status.csv" )" != 200 ] then echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\"" fi # Check the current status is correct MEDIA_TRANSCODED response=$( grep --perl-regexp ",${ep_num},.*MEDIA_TRANSCODED," "${processing_dir}/status.csv" | head -1 | sed 's/,/ /g' ) if [ -z "${response}" ] then grep --perl-regexp ",${ep_num},.*MEDIA_TRANSCODED," "${processing_dir}/status.csv" echo_error "The show \"${ep_num}\" hasn't the correct status of \"MEDIA_TRANSCODED\" in the database." fi echo_debug "The show \"${ep_num}\" has the correct status of \"MEDIA_TRANSCODED\" in the database." #TODO images # Picture 1 shows the broken dog    walking accessory. } ################################################# # Wait for the Internet Archive to finish processing function wait_for_the_internet_archive_to_process() { echo_debug "Waiting for the Internet Archive to finish processing. wait_for_the_internet_archive_to_process()" check_variable_is_correct ep_num while [ "$( ia tasks "hpr${ep_num}" | jq --slurp '[.[] | if .category == "catalog" then .status else empty end] | length' )" -ne "0" ] do echo_debug "Waiting for the Internet Archive to finish processing \"hpr${ep_num}\"." date sleep 1m done } ################################################# # Send the derived files to the server borg to be sent to the Internet Archive function create_item_on_the_internet_archive() { echo_debug "Sending the derived files to Internet Archive. create_item_on_the_internet_archive()" check_variable_is_correct working_dir ep_num artist date synopsis title license_url if [ ! -s "${working_dir}/hpr${ep_num}.txt" ] then echo_error "The Internet Archive \"${working_dir}/hpr${ep_num}.txt\" is missing." fi ia list "hpr${ep_num}" > /dev/null if [ $? -ne 0 ] then echo_debug "The episode has not been posted to the Internet Archive" ia upload hpr${ep_num} "${working_dir}/hpr${ep_num}.txt" --metadata=mediatype:audio --metadata="contributor:HackerPublicRadio" --metadata="creator:${artist}" --metadata="date:${date}" --metadata="description:${synopsis}." --metadata="language:eng" --metadata="licenseurl:${license_url}" --metadata="title:${title}." --metadata=reviews-allowed:none --header x-archive-keep-old-version:0 --retries=5 --no-derive --no-backup else echo_debug "The episode has been posted to the Internet Archive" fi xdg-open https://archive.org/details/hpr${ep_num} wait_for_the_internet_archive_to_process } ################################################# # Send the derived files to the server borg to be sent to the Internet Archive function upload_files_to_the_internet_archive() { echo_debug "Sending the derived files to Internet Archive. upload_files_to_the_internet_archive()" check_variable_is_correct working_dir ep_num if [ -z "${assets_json}" ] then assets_json="${working_dir}/episode_summary.json" fi if [ ! -s "${assets_json}" ] then echo_debug "The \"assets_json\" variable/file is missing." if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${assets_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi fi check_variable_is_correct assets_json # if [ "$( find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \) | wc --lines )" -ne "8" ] # then # echo_error "There are not 8 derived files for the Internet Archive." # fi # # while read this_ia_file # do # if [[ ! -s "${this_ia_file}" ]] # then # ls -al "${this_ia_file}" # echo_error "The derived files to the Internet Archive are missing \"${this_ia_file}\"." # fi # # echo_debug "Sending \"${this_ia_file}\" to the Internet Archive" # # # # origin_sha1sum="$( echo $( ssh rsync.net "sha1 hpr/eps/hpr${ep_num}/${this_asset}" 2> /dev/null ) | awk '{print $NF}' )" # # this_asset_sha1sum="$( sha1sum "${this_file}" | awk '{print $1}' )" # # # # if [[ -z "${origin_sha1sum}" || -z "${this_asset_sha1sum}" ]] # # then # # echo_error "Could not determine the local/origin sha1sum for file \"${this_file}\"." # # fi # # # # ia download hpr_761c0f557b87090db3f8d4d9bce7fc70 hpr_761c0f557b87090db3f8d4d9bce7fc70_files.xml # # echo ia upload hpr${ep_num} "${this_ia_file}" --header "x-archive-keep-old-version:0" --retries=5 --no-derive --no-backup # # # # done < <( find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \) ) } ################################################# # Main exceution starts here # # This tool will process the HPR shows allowing the janitor to review the media and fix shownotes. # # TODO Add support for reserve queue - process validate and move to reserve dir # TODO Add support to reprocess processed shows - when given onlay new media reprocess it, update duration on hub, generate and download shownotes.{html,json} from db # TODO Add support for community news - reusing ^^^ # TODO Add support for stereo for some episodes that request it # TODO Include links in extract_images_brute_force # TODO take screenshots of the rendered episode on the hpr website # TODO audio_channels default to mono - stereo as an option # TODO Add chapter support # TODO incorporate direct upload to the IA # TODO copy the files to the backup disk # Get supplied working dir and ep_num if provided if [ $# -gt 0 ] then # declare -A hash for argument do if [[ $argument =~ ^[^=]+=.*$ ]] then key="${argument%=*}" value="${argument#*=}" eval "${key}=${value}" echo_debug "Using supplied \"${key}\" of \"${value}\"" check_variable_is_correct ${key} fi done fi # program_checks # We know that all the programs and variables are set # # get_working_dir $@ # We have a working directory and a valid json file # # get_episode_metadata $@ # We have all the metadata we need to process the show. # # extract_images_brute_force # We have extracted the images by brute force # # media_checks # # # generate_initial_report # Generate Initial Report for review by the Janitors # # manual_shownotes_review # Janitors review audio and shownote. Skips if done. # # post_show_to_hpr_db # Posts the episode to HPR. Skips if it is already posted. # ######################################################################################### # Using DB info from here get_variables_from_episode_summary_json # Get the episode from HPR. # # create_tts_summary # Generate text to speech summary # # generate_intro # Generate Intro from the intro theme with overlay of a lead in silence then the tts summary # # generate_parent_audio # Combines the intro, the episode, and the outro to a final cut. # # generate_derived_media # Generate the flac wav mp3 ogg opus files # # generate_show_transcript # # generate_final_report # # manual_final_review # # register_assets # copy_files_to_origin_server #check_variable_is_correct working_dir ep_num shownotes_edited # copy_derived_files_to_borg # create_item_on_the_internet_archive upload_files_to_the_internet_archive #for i in {4301..4305};do echo ${i};/home/ken/sourcecode/personal/bin/hpr-check-ccdn-links.bash ${i};done echo_debug "The End" exit 0 # if ${force_overwrite} # then # echo_debug "The setting \"force_overwrite\" is set to true, so files will be overwritten." # else # echo_debug "The setting \"force_overwrite\" is set to false, so when files exist the program will skip files if they exist." # fi # curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${episode_summary_json}" # curl --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id=${ep_num} --output "${working_dir}/shownotes.json"