2025-01-14 19:41:34 +00:00
#!/usr/bin/env bash
# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/
#============================================================
# git clone https://github.com/davmo/fix_tags.git
# git clone https://repo.anhonesthost.net/HPR/hpr-tools.git
#################################################
# Variables
debug = "1"
force_overwrite = true # Sets the behavour when files exist or not
processing_dir = " $HOME /tmp/hpr/processing " # The directory where the files will be copied to for processing
theme = " ${ processing_dir } /theme.flac " # The hpr theme
silence = " ${ processing_dir } /silence.flac " # A segment of silence to offset the tts in the intro
outro_flac = " ${ processing_dir } /outro.flac " # The outro edited media
outro_srt = " ${ processing_dir } /outro.srt " # The outro subtitle file
intro_srt = " ${ processing_dir } /intro.srt " # The intro subtitle template file
piper_bin = "/opt/bin/piper/piper/piper" # The location of the text to speech engine
piper_voice = "/opt/bin/piper/piper/piper-voices/en/en_US/lessac/medium/en_US-lessac-medium.onnx"
#################################################
# Display Error message, display help and exit
function echo_error( ) {
echo -e " ERROR: $@ " #1>&2
display_help_and_exit
exit 1
}
#################################################
# Display Information message
function echo_debug( ) {
if [ " ${ debug } " != "0" ]
then
echo -e " INFO: $@ " #1>&2
fi
}
#################################################
# Display Help
function display_help_and_exit( ) {
echo_debug "For more information see https://repo.anhonesthost.net/HPR/hpr_documentation"
exit 1
}
#################################################
# Program Checks
function program_checks( ) {
if [ ! -d " ${ processing_dir } " ]
then
echo_error " The \" ${ processing_dir } \" is required but is not defined. "
fi
if [ [ ! -s " ${ theme } " || ! -s " ${ silence } " || ! -s " ${ outro_flac } " || ! -s " ${ outro_srt } " || ! -s " ${ intro_srt } " ] ]
then
echo_error "The files for the theme are not available."
ls -al " ${ theme } " " ${ silence } " " ${ outro_flac } " " ${ outro_srt } " " ${ intro_srt } "
fi
function is_installed( ) {
for this_program in " $@ "
do
if ! command -v ${ this_program } 2>& 1 >/dev/null
then
echo_error " The application \" ${ this_program } \" is required but is not installed. "
fi
done
}
2025-01-16 21:02:43 +00:00
is_installed audio2image.bash awk base64 cat csvtojson curl date detox eval extract_images ffprobe file find grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget
2025-01-14 19:41:34 +00:00
for arg in $*
do
if [ " $( echo " ${ arg } " | grep --count --ignore-case --perl-regexp -- '-h|--help' ) " -ne "0" ]
then
echo_debug "Process the next SHOW_SUBMITTED show from the queue."
echo_debug "If a directory is provided then the shownotes.json will be used."
fi
done
if ${ force_overwrite }
then
echo_debug "The setting \"force_overwrite\" is set to true, so files will be overwritten."
else
echo_debug "The setting \"force_overwrite\" is set to false, so when files exist the program will skip files if they exist."
fi
}
#################################################
# Get the next show in the queue
function get_working_dir_from_hpr_hub( ) {
echo_debug "Processing the next HPR Show in the queue"
if [ " $( curl --silent --netrc-file ${ HOME } /.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output " ${ processing_dir } /status.csv " ) " != 200 ]
then
echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\""
fi
if [ ! -s " ${ processing_dir } /status.csv " ]
then
echo_error " Failed to retrieve \" ${ processing_dir } /status.csv\" from server. "
fi
response = $( cat " ${ processing_dir } /status.csv " | grep ',SHOW_SUBMITTED,' | head -1 | sed 's/,/ /g' )
if [ -z " ${ response } " ]
then
echo_debug "Getting a list of all the reservations."
curl --silent --netrc-file ${ HOME } /.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n
echo_error "There appear to be no more shows with the status \"SHOW_SUBMITTED\"."
fi
timestamp_epoc = " $( echo ${ response } | awk '{print $1}' ) "
ep_num = " $( echo ${ response } | awk '{print $2}' ) "
ep_date = " $( echo ${ response } | awk '{print $3}' ) "
key = " $( echo ${ response } | awk '{print $4}' ) "
email = " $( echo ${ response } | awk '{print $6}' ) "
email_unpadded = " $( echo $email | sed 's/.nospam@nospam./@/g' ) "
hpr_upload_dir = " /home/hpr/upload/ ${ timestamp_epoc } _ ${ ep_num } _ ${ ep_date } _ ${ key } "
source_dir = " hpr: ${ hpr_upload_dir } "
dest_dir = " ${ timestamp_epoc } _ ${ ep_num } _ ${ ep_date } _ ${ key } "
working_dir = " ${ processing_dir } / ${ dest_dir } "
echo_debug " Downloading hpr ${ ep_num } from ${ email_unpadded } "
echo_debug ""
echo_debug " rsync -ave ssh --partial --progress ${ source_dir } / ${ working_dir } / "
ssh hpr -t " detox -v ${ hpr_upload_dir } / "
rsync -ave ssh --partial --progress ${ source_dir } / ${ working_dir } /
if [ ! -s " ${ working_dir } /shownotes.json " ]
then
echo_error " The working dir is missing the shownotes file \" ${ working_dir } /shownotes.json\" "
fi
if [ " $( file --brief --mime-type " ${ working_dir } /shownotes.json " | grep --count "application/json" ) " -eq 0 ]
then
echo_error " The \" ${ working_dir } /shownotes.json\" is not a \"application/json\" file "
fi
}
#################################################
# Get the show information from a local directory
function get_working_dir_from_local_dir( ) {
echo_debug "Processing a local directory"
# Get supplied working dir and ep_num if provided
for argument in " $@ "
do
if [ -d " ${ argument } " ]
then
working_dir = " $( realpath ${ argument } ) "
fi
if [ " $( echo " ${ argument } " | grep --count "ep_num=" ) " -eq "1" ]
then
ep_num = " $( echo " ${ argument } " | sed 's/^.*ep_num=//g' | awk '{print $1}' ) "
fi
done
if [ [ ! -d " ${ working_dir } " || -z " ${ working_dir } " ] ]
then
echo_error "The working dir is missing. Please supply a working directory."
fi
if [ ! -s " ${ working_dir } /shownotes.json " ]
then
echo_debug " Could not find a \"shownotes.json\" in the working directory \" ${ working_dir } /\" "
if [ -z " ${ ep_num } " ]
then
echo_debug " Attempting to get episode number from the \" ${ working_dir } \" "
ep_num = " $( echo " ${ working_dir } " | grep --color= never --perl-regexp --only-matching '_[0-9]{4}_' | sed 's/_//g' ) "
fi
if [ -z " ${ ep_num } " ]
then
echo_error " Could not find the episode number - please rerun with \" $0 ep_num=9876\" "
fi
echo_debug " Attempting to download information for episode \" ${ ep_num } \" "
if [ " $( curl --silent --netrc-file ${ HOME } /.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/shownotes.php?id= ${ ep_num } --output " ${ working_dir } /shownotes.json " ) " != 200 ]
then
echo_error " The Episode hpr ${ ep_num } has not been posted. "
fi
if [ ! -s " ${ working_dir } /shownotes.json " ]
then
echo_error " The Episode information for hpr ${ ep_num } failed to download. "
fi
fi
if [ [ -s " ${ working_dir } /shownotes.json " && " $( file --brief --mime-type " ${ working_dir } /shownotes.json " | grep --count "application/json" ) " -eq 0 ] ]
then
echo_error " \" ${ working_dir } /shownotes.json\" is not a \"application/json\" file "
fi
}
#################################################
# Get the show either the next in the queue
# or from a local queue directory
function get_working_dir( ) {
echo_debug " Getting working directory and populating show information. $# "
if [ $# -eq 0 ]
then
get_working_dir_from_hpr_hub
else
get_working_dir_from_local_dir $@
fi
if [ ! -s " ${ working_dir } /shownotes.json " ]
then
echo_error " The working dir \" ${ working_dir } \" could not be found. "
fi
echo_debug " Found working directory as \" ${ working_dir } \" "
if [ ! -d " ${ working_dir } /processing/ " ]
then
mkdir -v " ${ working_dir } /processing/ "
if [ ! -d " ${ working_dir } /processing/ " ]
then
echo_error " Could not create the processing directory \" ${ working_dir } /processing/\". "
fi
fi
}
#################################################
# Provides all the metadata we need to process the show.
function get_episode_metadata( ) {
echo_debug "Extracting the episode metadata"
if [ [ -s " ${ working_dir } /shownotes.json " && " $( file --brief --mime-type " ${ working_dir } /shownotes.json " | grep --count "application/json" ) " -eq 0 ] ]
then
echo_error " \" ${ working_dir } /shownotes.json\" is not a \"application/json\" file "
fi
shownotes_json = " ${ working_dir } /shownotes.json "
shownotes_html = " ${ working_dir } /shownotes.html "
hostid = " $( jq --raw-output '.host.Host_ID' ${ shownotes_json } ) "
host_name = " $( jq --raw-output '.host.Host_Name' ${ shownotes_json } ) "
email = " $( jq --raw-output '.host.Host_Email' ${ shownotes_json } ) "
email_padded = " $( echo $email | sed 's/@/.nospam@nospam./g' ) "
title = " $( jq --raw-output '.episode.Title' ${ shownotes_json } ) "
summary = " $( jq --raw-output '.episode.Summary' ${ shownotes_json } ) "
series_id = " $( jq --raw-output '.episode.Series' ${ shownotes_json } ) "
series_name = " $( jq --raw-output '.episode.Series_Name' ${ shownotes_json } ) "
explicit = " $( jq --raw-output '.episode.Explicit' ${ shownotes_json } ) "
episode_license = " $( jq --raw-output '.episode.Show_License' ${ shownotes_json } ) "
ep_date = " $( jq --raw-output '.metadata.Episode_Date' ${ shownotes_json } ) "
ep_num = " $( jq --raw-output '.metadata.Episode_Number' ${ shownotes_json } ) "
key = " $( jq --raw-output '.metadata.Key' ${ shownotes_json } ) "
tags = " $( jq --raw-output '.episode.Tags' ${ shownotes_json } ) "
host_license = " $( jq --raw-output '.host.Host_License' ${ shownotes_json } ) "
host_profile = " $( jq --raw-output '.host.Host_Profile' ${ shownotes_json } ) "
remote_media = " $( jq --raw-output '.metadata.url' ${ shownotes_json } ) "
shownotes_json_sanatised = $( jq 'del(.episode.Show_Notes, .metadata.Host_IP)' " ${ shownotes_json } " )
echo_debug "Extracting shownotes html from json file."
jq --raw-output '.episode.Show_Notes' " ${ shownotes_json } " > " ${ shownotes_html } "
if [ ! -s " ${ shownotes_html } " ]
then
echo_error " Failed to extract the shownote html file \" ${ shownotes_html } \" "
fi
variables = ( shownotes_json shownotes_html hostid host_name email title summary series_id series_name explicit episode_license ep_date ep_num tags host_license host_profile remote_media shownotes_json_sanatised )
for variable in " ${ variables [@] } "
do
if [ [ -z ${ !variable } && " ${ variable } " != "remote_media" ] ]
then # indirect expansion here
echo_error " The variable \" ${ variable } \" is missing. " ;
else
echo_debug " The variable \" ${ variable } \" is set to \" ${ !variable } \" " ;
fi
done
# Argument Override
if [ $# -gt 0 ]
then
declare -A hash
for argument
do
if [ [ $argument = ~ ^[ ^= ] += .*$ ] ]
then
this_key = " ${ argument %=* } "
this_value = " ${ argument #*= } "
this_value = " $( echo " ${ this_value } " | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' ) "
eval " ${ this_key } = ${ this_value } "
echo_debug " Replacing \" ${ this_key } \" with \" ${ this_value } \". "
fi
done
fi
# Hosts need to exist in the database
if [ " ${ hostid } " = = '0' ]
then
echo_error "The hostid is 0. Create the host and use \"hostid=???\" to override"
fi
}
#################################################
# Extract_images by brute force
function extract_images_brute_force( ) {
echo_debug "Extracting images with grep."
2025-01-16 21:02:43 +00:00
if [ -s " ${ shownotes_html %.* } _edited.html " ]
then
echo_debug " There is already an edited version of the shownotes at \" ${ shownotes_html %.* } _edited.html\", slipping image extraction. "
return
fi
2025-01-14 19:41:34 +00:00
if [ [ -z " ${ shownotes_html } " || ! -s " ${ shownotes_html } " ] ]
then
echo_error " The shownotes_html file \" ${ shownotes_html } \" could not be found. "
fi
## TODO Temp fix until https://repo.anhonesthost.net/HPR/hpr-tools/issues/3 is available
sed "s#>#>\n#g" " ${ shownotes_html } " | sponge " ${ shownotes_html } "
# Extract embedded images
2025-01-16 21:02:43 +00:00
image_count_embedded = "1"
2025-01-14 19:41:34 +00:00
for image in $( grep --color= never --perl-regexp --only-matching 'data:image/[^;]*;base64,\K[a-zA-Z0-9+/=]*' " ${ shownotes_html } " )
do
2025-01-16 21:02:43 +00:00
this_image = " ${ working_dir } /hpr ${ ep_num } _image_ ${ image_count_embedded } "
2025-01-14 19:41:34 +00:00
echo -n " $image " | base64 -di > ${ this_image }
this_ext = " $( file --mime-type ${ this_image } | awk -F '/' '{print $NF}' ) "
mv -v " ${ this_image } " " ${ this_image } . ${ this_ext } "
this_width = " $( mediainfo " ${ this_image } . ${ this_ext } " | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' ) "
if [ " ${ this_width } " -gt "400" ]
then
2025-01-16 21:02:43 +00:00
echo_debug " Generating thumbnail for embedded image \" ${ this_image } . ${ this_ext } \". "
2025-01-14 19:41:34 +00:00
magick " ${ this_image } . ${ this_ext } " -resize 400x " ${ this_image } _tn. ${ this_ext } "
fi
2025-01-16 21:02:43 +00:00
( ( image_count_embedded = image_count_embedded+1) )
2025-01-14 19:41:34 +00:00
done
# Download referenced images
2025-01-16 21:02:43 +00:00
image_count_external = "1"
2025-01-14 19:41:34 +00:00
for image in $( grep --color= never --perl-regexp --only-matching '<img.*src.*http.*>' " ${ shownotes_html } " | awk -F 'src=' '{print $2}' | awk -F '"' '{print $2}' )
do
2025-01-16 21:02:43 +00:00
this_image = " ${ working_dir } /hpr ${ ep_num } _image_ext_ ${ image_count_external } "
2025-01-14 19:41:34 +00:00
wget " ${ image } " --output-document= ${ this_image }
this_ext = " $( file --mime-type ${ this_image } | awk -F '/' '{print $NF}' ) "
2025-01-16 21:02:43 +00:00
mv -v " ${ this_image %.* } " " ${ this_image } . ${ this_ext } "
2025-01-14 19:41:34 +00:00
this_width = " $( mediainfo " ${ this_image } . ${ this_ext } " | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' ) "
if [ " ${ this_width } " -gt "400" ]
then
2025-01-16 21:02:43 +00:00
echo_debug " Generating thumbnail for external image \" ${ this_image } . ${ this_ext } \". "
2025-01-14 19:41:34 +00:00
magick " ${ this_image } . ${ this_ext } " -resize 400x " ${ this_image } _tn. ${ this_ext } "
fi
2025-01-16 21:02:43 +00:00
( ( image_count_external = image_count_external+1) )
2025-01-14 19:41:34 +00:00
done
2025-01-16 21:02:43 +00:00
cat " ${ shownotes_html } " | remove-image.pl | sponge " ${ shownotes_html } "
2025-01-14 19:41:34 +00:00
2025-01-16 21:02:43 +00:00
if [ " ${ image_count_embedded } " -gt "1" ]
then
image_count_embedded = "1"
touch " ${ shownotes_html } .embedded_images "
cat " ${ shownotes_html } " | while read this_line
do
if [ " $( echo " ${ this_line } " | grep --count "LOCAL_IMAGE_REMOVED" ) " -eq "0" ]
then
echo " ${ this_line } " >> " ${ shownotes_html } .embedded_images "
else
this_image = " $( find " ${ working_dir } / " -type f -iname " hpr ${ ep_num } _image_ ${ image_count_embedded } .* " ) "
if [ [ -z " ${ this_image } " || ! -s " ${ this_image } " ] ]
then
echo_error " Unable to find an image for \" ${ image_count_embedded } \", \" ${ this_image } \". "
fi
this_image = " $( basename " ${ this_image } " ) "
this_image_tn = " $( find " ${ working_dir } / " -type f -iname " ${ this_image %.* } _tn.* " ) "
if [ [ -z " ${ this_image_tn } " || ! -s " ${ this_image_tn } " ] ]
then
echo " ${ this_line } " | sed " s@LOCAL_IMAGE_REMOVED@ ${ this_image } @g " >> " ${ shownotes_html } .embedded_images "
else
this_image_tn = " $( basename " ${ this_image_tn } " ) "
echo " <a href=\" ${ this_image } \"> " >> " ${ shownotes_html } .embedded_images "
echo " ${ this_line } " | sed " s@LOCAL_IMAGE_REMOVED@ ${ this_image_tn } @g " >> " ${ shownotes_html } .embedded_images "
echo "</a>" >> " ${ shownotes_html } .embedded_images "
fi
( ( image_count_embedded = image_count_embedded+1) )
fi
done
mv -v " ${ shownotes_html } .embedded_images " " ${ shownotes_html } "
else
echo_debug " No embedded images found. ${ image_count_embedded } "
fi
2025-01-14 19:41:34 +00:00
2025-01-16 21:02:43 +00:00
if [ " ${ image_count_external } " -gt "1" ]
then
image_count_external = "1"
touch " ${ shownotes_html } .external_images "
cat " ${ shownotes_html } " | remove-image.pl | while read this_line
do
if [ " $( echo " ${ this_line } " | grep --count "REMOTE_IMAGE_REMOVED" ) " -eq "0" ]
then
echo " ${ this_line } " >> " ${ shownotes_html } .external_images "
else
this_image = " $( find " ${ working_dir } / " -type f -iname " hpr ${ ep_num } _image_ext_ ${ image_count_external } .* " ) "
if [ [ -z " ${ this_image } " || ! -s " ${ this_image } " ] ]
then
echo_error " Unable to find an image for \" ${ image_count_external } \", \" ${ this_image } \". "
fi
this_image = " $( basename " ${ this_image } " ) "
this_image_tn = " $( find " ${ working_dir } / " -type f -iname " ${ this_image %.* } _tn.* " ) "
if [ [ -z " ${ this_image_tn } " || ! -s " ${ this_image_tn } " ] ]
then
echo " ${ this_line } " | sed " s@REMOTE_IMAGE_REMOVED@ ${ this_image } @g " >> " ${ shownotes_html } .external_images "
else
this_image_tn = " $( basename " ${ this_image_tn } " ) "
echo " <a href=\" ${ this_image } \"> " >> " ${ shownotes_html } .external_images "
echo " ${ this_line } " | sed " s@REMOTE_IMAGE_REMOVED@ ${ this_image_tn } @g " >> " ${ shownotes_html } .external_images "
echo "</a>" >> " ${ shownotes_html } .external_images "
fi
( ( image_count_external = image_count_external+1) )
fi
done
mv -v " ${ shownotes_html } .external_images " " ${ shownotes_html } "
else
echo_debug "No external images found."
fi
2025-01-14 19:41:34 +00:00
## TODO End Temp fix
}
#################################################
## Media Checks
function media_checks( ) {
if [ -n " ${ remote_media } " ]
then
echo_debug " Fetching remote media from \" ${ remote_media } \" "
wget --timestamping --directory-prefix= " ${ working_dir } / " " ${ remote_media } "
if [ $? -ne 0 ]
then
echo_error "Could not get the remote media"
fi
fi
media = $( find " ${ working_dir } / " -maxdepth 1 -type f -exec file --mime-type { } \; | grep -Ei ' audio/| video/' | awk -F ': ' '{print $1}' )
if [ -z " ${ media } " ]
then
find " ${ working_dir } / " -type f
echo_error " Can't find any media in \" ${ working_dir } /\" "
fi
media_basename = " $( basename " ${ media } " ) "
if [ -z " ${ media_basename } " ]
then
echo_error " Could not create the media_basename \" ${ media_basename } /\" "
fi
if [ " $( echo " ${ media } " | wc -l ) " -ne 1 ]
then
echo "Multiple files found. Which one do you want to use ?"
select this_media in $( echo " ${ media } " )
do
ls -al " ${ this_media } "
media = " ${ this_media } "
break
done
fi
echo_debug " You selected \" ${ media } \". "
if [ [ -z " ${ media } " || ! -s " ${ media } " ] ]
then
echo_error " Could not find the media \" ${ media } /\" "
fi
shownotes_srt = " ${ media %.* } .srt "
if [ [ -z " ${ shownotes_srt } " || ! -s " ${ shownotes_srt } " ] ]
then
echo_error " Could not find the subtitles for media \" ${ media } /\" "
fi
# Find duration
duration = $( mediainfo --full --Output= JSON " ${ media } " | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' )
if [ [ -z " ${ duration } " || " ${ duration } " -lt "30" || " ${ duration } " -gt "30000" ] ]
then
echo_error " Invalid duration found in \" ${ media } \" " >& 2
fi
echo_debug " The Duration is \" ${ duration } \" seconds from \" ${ media } \" "
# Find number of channels ( 1=mono or 2=stereo)
supplied_channels = $( mediainfo --full --Output= JSON " ${ media } " | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Channels' )
if [ [ -z " ${ supplied_channels } " || " ${ supplied_channels } " -lt "1" || " ${ supplied_channels } " -gt "2" ] ]
then
echo_error " Invalid number of audio channels \" ${ supplied_channels } \" found in \" ${ media } \" " >& 2
fi
echo_debug " The number of audio channels is \" ${ supplied_channels } \" from \" ${ media } \" . "
# Gernerate the Spectrum and Waveform image
ffmpeg -hide_banner -loglevel error -y -i " ${ media } " -lavfi "showspectrumpic=s=960x540" " ${ working_dir } /processing/ ${ media_basename %.* } _spectrum.png "
audio2image.bash " ${ media } " && mv -v " ${ media %.* } .png " " ${ working_dir } /processing/ ${ media_basename %.* } _waveform.png "
# Getting metadata
mediainfo " ${ media } " > " ${ working_dir } /processing/ ${ media_basename %.* } _mediainfo.txt "
exiftool " ${ media } " > " ${ working_dir } /processing/ ${ media_basename %.* } _exiftool.txt "
for check_file in spectrum.png waveform.png mediainfo.txt exiftool.txt
do
if [ ! -s " ${ working_dir } /processing/ ${ media_basename %.* } _ ${ check_file } " ]
then
echo_error " The ${ check_file } file was not generated for the \" ${ working_dir } /processing/ ${ media_basename %.* } _ ${ check_file } \" " >& 2
fi
done
ffprobe = " $( ffprobe ${ media } 2>& 1 | grep Audio: | sed 's/^.\s//g' ) "
file_mime = " $( file --brief --mime ${ media } ) "
file_mime_type = " $( file --brief --mime-type ${ media } ) "
if [ [ -z " ${ ffprobe } " || -z ${ file_mime } || -s ${ file_mime_type } ] ]
then
echo " ffprobe: ${ ffprobe } , file_mime: ${ file_mime } ,file_mime_type: ${ file_mime_type } "
echo_error " Wasn't able to find mime metadata from \" ${ media } /\" "
fi
}
#################################################
## Generate Initial Report for review by the Janitors
function generate_initial_report( ) {
echo_debug "Generating the initial report."
# TODO list the images.
echo " <!DOCTYPE HTML>
<html lang = \" en\" >
<head>
<title>Hacker Public Radio ~ The Technology Community Podcast</title>
<meta charset = \" utf-8\" />
<style>
body {
font-family: sans-serif;
}
table {
border-collapse: collapse;
text-align: left;
vertical-align: middle;
}
th, td {
border: 1px solid black;
}
</style>
</head>
<body>
<h3>Field Mapping</h3>
<table>
<thead>
<tr>
<th>Field</th><th>Value</th>
</tr>
</thead>
<tbody>
<tr><th>hostid</th><td>${ hostid } </td></tr>
<tr><th>host_name</th><td>${ host_name } </td></tr>
<tr><th>title</th><td>${ title } </td></tr>
<tr><th>summary</th><td>${ summary } </td></tr>
<tr><th>series_id</th><td>${ series_id } </td></tr>
<tr><th>series_name</th><td>${ series_name } </td></tr>
<tr><th>explicit</th><td>${ explicit } </td></tr>
<tr><th>episode_license</th><td>${ episode_license } </td></tr>
<tr><th>tags</th><td>${ tags } </td></tr>
<tr><th>host_license</th><td>${ host_license } </td></tr>
<tr><th>host_profile</th><td>${ host_profile } </td></tr>
<tbody>
</table>
<h3>Raw shownotes.json</h3>
<code>
<pre>
${ shownotes_json_sanatised }
</pre>
</code>
<h2>Audio</h2>
<h3>mediainfo report</h3>
<code>
<pre>
$( cat " ${ working_dir } /processing/ ${ media_basename %.* } _mediainfo.txt " )
</pre>
</code>
<h3>exiftool report</h3>
<code>
<pre>
$( cat " ${ working_dir } /processing/ ${ media_basename %.* } _exiftool.txt " )
</pre>
</code>
<h3>Audio Spectrum</h3>
<p>
<img src = \" ${ working_dir } /processing/${ media_basename %.* } _spectrum.png\" alt = \" Spectrum\" />
</p>
<h3>Audio Waveform</h3>
<p>
<img src = \" ${ working_dir } /processing/${ media_basename %.* } _waveform.png\" alt = \" Waveform\" />
</p>
<pre>
${ ffprobe }
${ file_mime }
</pre>
<p>
<audio controls = \" \" preload = \" none\" style = \" width:600px; \" >
<source src = \" ${ media } \" type = \" ${ file_mime_type } \" >
</audio>
<br />
<a href = \" ${ media } \" >${ media } </a>
</p>
<h3>Transcript</h3>
<hr />
<pre>
$( cat " ${ shownotes_srt } " )
</pre>
<hr />
</body>
</html>" > " ${ working_dir } /processing/${ media_basename %.* } _media_report.html"
}
#################################################
## Manually edit the shownotes to fix issues
function manual_shownotes_review( ) {
echo_debug "Validating the initial report."
if [ [ -z " ${ shownotes_html } " || ! -s " ${ shownotes_html } " || ! -s " ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html " ] ]
then
echo " shownotes_html: ${ shownotes_html } "
ls -al " ${ shownotes_html } " " ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html "
echo_error "The files needed for to generate the inital report information are not available."
fi
if [ -s " ${ shownotes_html %.* } _edited.html " ]
then
echo_debug " There is already an edited version of the shownotes at \" ${ shownotes_html %.* } _edited.html\". "
return
fi
cp -v " ${ shownotes_html } " " ${ shownotes_html %.* } _edited.html "
if [ ! -s " ${ shownotes_html %.* } _edited.html " ]
then
echo_error " The edited shownotes are missing \" ${ shownotes_html %.* } _edited.html\". "
fi
kate " ${ shownotes_html %.* } _edited.html " >/dev/null 2>& 1 &
librewolf " ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html " >/dev/null 2>& 1 &
seamonkey " ${ shownotes_html %.* } _edited.html " >/dev/null 2>& 1 &
# # # # bluefish "${shownotes_html%.*}_edited.html" >/dev/null 2>&1 &
# https://markdowntohtml.com/
read -p "Does the metadata 'look ok ? (N|y) ? " -n 1 -r
echo # (optional) move to a new line
if [ [ ! $REPLY = ~ ^[ yY] $ ] ]
then
echo_error "The final review was not approved."
fi
# remove extra wrappers that seamonkey adds
grep --invert-match --perl-regexp '<!DOCTYPE|html>|head>|<meta|body>' " ${ shownotes_html %.* } _edited.html " | sponge " ${ shownotes_html %.* } _edited.html "
# Check to see if images have been linked TODO make a loop for found images
if [ " $( find " ${ working_dir } " -type f -iname "*_image_*" | wc -l ) " -ne "0" ]
then
if [ " $( grep --count "_image_" " ${ shownotes_html %.* } _edited.html " ) " -eq "0" ]
then
echo_error " The extracted images were not linked in the shownotes \" ${ shownotes_html %.* } _edited.html\". "
fi
fi
}
#################################################
# Post show to HPR
function post_show_to_hpr_db( ) {
if [ " $( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id= ${ ep_num } --output /dev/null ) " = = 200 ]
then
echo " WARN: The Episode hpr ${ ep_num } has already been posted "
return
fi
if [ ! -s " ${ shownotes_html %.* } _edited.html " ]
then
echo_error " Failed to find the extracted shownote html file \" ${ shownotes_html %.* } _edited.html\" "
fi
notes = " $( cat " ${ shownotes_html %.* } _edited.html " | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' ) "
host_profile_encoded = " $( echo " ${ host_profile } " | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' ) "
post_show_json = " ${ working_dir } /post_show.json "
echo "Sending:"
echo " key= ${ key }
ep_num = ${ ep_num }
ep_date = ${ ep_date }
email = ${ email_padded }
title = ${ title }
duration = ${ duration }
summary = ${ summary }
series_id = ${ series_id }
series_name = ${ series_name }
explicit = ${ explicit }
episode_license = ${ episode_license }
tags = ${ tags }
hostid = ${ hostid }
host_name = ${ host_name }
host_license = ${ host_license }
host_profile = ${ host_profile_encoded }
notes = REMOVED"
echo " {
\" key\" : \" ${ key } \" ,
\" ep_num\" : \" ${ ep_num } \" ,
\" ep_date\" : \" ${ ep_date } \" ,
\" email\" : \" ${ email_padded } \" ,
\" title\" : \" ${ title } \" ,
\" duration\" : \" ${ duration } \" ,
\" summary\" : \" ${ summary } \" ,
\" series_id\" : \" ${ series_id } \" ,
\" series_name\" : \" ${ series_name } \" ,
\" explicit\" : \" ${ explicit } \" ,
\" episode_license\" : \" ${ episode_license } \" ,
\" tags\" : \" ${ tags } \" ,
\" hostid\" : \" ${ hostid } \" ,
\" host_name\" : \" ${ host_name } \" ,
\" host_license\" : \" ${ host_license } \" ,
\" host_profile\" : \" ${ host_profile_encoded } \" ,
\" notes\" : \" ${ notes } \"
} " > " ${ post_show_json } "
jq '.' " ${ post_show_json } "
if [ $? -ne 0 ]
then
echo_error " The file \" ${ post_show_json } \" is not valid json. "
fi
curl --netrc --include --request POST "https://hub.hackerpublicradio.org/cms/add_show_json.php" --header "Content-Type: application/json" --data-binary " @ ${ post_show_json } "
if [ " $( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id= ${ ep_num } --output /dev/null ) " != 200 ]
then
echo_error " The Episode hpr ${ ep_num } has not been posted "
fi
}
#################################################
# Generate text to speech summary
function create_tts_summary {
if [ " $( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id= ${ ep_num } --output " ${ working_dir } /episode_summary.json " ) " != 200 ]
then
echo_error " The Episode hpr ${ ep_num } has not been posted "
fi
synopsis = " $( jq --raw-output '.synopsis' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
if [ [ -z " ${ synopsis } " || " ${ synopsis } " = = "null" ] ]
then
echo_error "Could not retrieve the synopsis for the text to speech."
fi
echo_debug " Converting text synopsis \" ${ synopsis } \" to speech. "
echo " ${ synopsis } " | " ${ piper_bin } " --model " ${ piper_voice } " --output_file " ${ working_dir } /processing/episode_tts.wav "
if [ ! -s " ${ working_dir } /processing/episode_tts.wav " ]
then
echo_error " The text to speech episode summary was not created \" ${ working_dir } /processing/episode_tts.wav\". "
fi
}
#################################################
# Generate Intro
function generate_intro {
echo_debug "Generating the intro."
if [ [ ! -s " ${ working_dir } /processing/episode_tts.wav " || ! -s " ${ theme } " || ! -s " ${ media } " || ! -s " ${ outro_flac } " || ! -d " ${ working_dir } /processing/ " ] ]
then
echo_error "The files for the intro are not available."
ls -al " ${ working_dir } /processing/episode_tts.wav " " ${ theme } " " ${ media } " " ${ outro_flac } " " ${ working_dir } /processing/ "
fi
# Everything needs to be in the same format for the intro, 1 channel (mono) Sampling rate 44.1 kHz
ffmpeg -hide_banner -loglevel error -y -i " ${ working_dir } /processing/episode_tts.wav " -ar 44100 -ac 1 " ${ working_dir } /processing//episode_tts.flac "
# A level of silence is added at the beginning of the text to speech
sox -V2 " ${ silence } " " ${ working_dir } /processing//episode_tts.flac " " ${ working_dir } /processing/episode_summary.flac "
# The tracks are merged together resulting in the theme playing first, then after a period of silence the text to speech enters
sox -V2 -m " ${ working_dir } /processing/episode_summary.flac " " ${ theme } " " ${ working_dir } /processing/episode_intro.flac "
if [ [ ! -s " ${ working_dir } /processing//episode_tts.flac " || ! -s " ${ working_dir } /processing/episode_summary.flac " || ! -s " ${ working_dir } /processing/episode_intro.flac " ] ]
then
echo_error "The files for the theme audio sandwich are not available."
ls -al " ${ working_dir } /processing//episode_tts.flac " " ${ working_dir } /processing/episode_summary.flac " " ${ theme } " " ${ working_dir } /processing/episode_intro.flac "
fi
}
#################################################
# Generate parent audio - the sandwitch
function generate_parent_audio {
echo_debug "Generating the parent audio - the sandwitch."
if [ [ ! -s " ${ working_dir } /processing/episode_intro.flac " || ! -s " ${ media } " || ! -s " ${ outro_flac } " ] ]
then
echo_error "The files for the sandwich are not available."
ls -al
fi
# Everything needs to be in the same format so the text to speech needs to be converted to 2 channel Sampling rate 44.1 kHz
ffmpeg -hide_banner -loglevel error -y -i " ${ media } " -ar 44100 -ac 1 " ${ working_dir } /processing/episode_body.flac "
# Combine the components together
sox -V2 " ${ working_dir } /processing/episode_intro.flac " " ${ working_dir } /processing/episode_body.flac " " ${ outro_flac } " " ${ working_dir } /processing/episode_sandwitch.flac "
# Normalise the audio
ffmpeg -hide_banner -loglevel error -y -i " ${ working_dir } /processing/episode_sandwitch.flac " -af loudnorm = I = -16:LRA= 11:TP= -1.5 " ${ working_dir } /processing/episode_final.flac "
}
#################################################
# Generate derived media
function generate_derived_media {
echo_debug "Generating derived audio."
if [ [ ! -s " ${ working_dir } /processing/episode_final.flac " ] ]
then
ls -al
echo_error "The final cut is not available."
fi
episode_comment = " $( jq --raw-output '.comment' " ${ working_dir } /episode_summary.json " ) "
episode_year = " $( echo " ${ ep_date } " | cut -c -4 ) "
# https://wiki.multimedia.cx/index.php?title=FFmpeg_Metadata
for ext in flac wav mp3 ogg opus
do
echo_debug " Generating \"hpr ${ ep_num } . ${ ext } \". "
ffmpeg -hide_banner -loglevel error -y -i " ${ working_dir } /processing/episode_final.flac " \
-metadata title = " ${ title } " \
-metadata artist = " ${ host_name } " \
-metadata author = " ${ host_name } " \
-metadata album = "Hacker Public Radio" \
-metadata comment = " ${ episode_comment } The license is ${ episode_license } " \
-metadata year = " ${ episode_year } " \
-metadata track = " ${ ep_num } " \
-metadata genre = "Podcast" \
-metadata language = "English" \
-metadata copyright = " ${ episode_license } " \
" ${ working_dir } /hpr ${ ep_num } . ${ ext } "
fix_tags -album= "Hacker Public Radio" -artist= " ${ host_name } " -comment= " ${ episode_comment } The license is ${ episode_license } " -genre= "Podcast" -title= " ${ title } " -track= " ${ ep_num } " -year= " ${ episode_year } " " ${ working_dir } /hpr ${ ep_num } . ${ ext } "
if [ [ ! -s " ${ working_dir } /hpr ${ ep_num } . ${ ext } " ] ]
then
echo_error " Failed to generate \" ${ working_dir } /hpr ${ ep_num } . ${ ext } \". "
ls -al " ${ working_dir } /hpr ${ ep_num } . ${ ext } "
fi
done
cp -v " ${ media } " " ${ working_dir } /hpr ${ ep_num } _source. ${ media ##*. } "
if [ [ ! -s " ${ working_dir } /hpr ${ ep_num } _source. ${ media ##*. } " ] ]
then
echo_error " Failed to copy \" ${ working_dir } /hpr ${ ep_num } _source. ${ media ##*. } \". "
ls -al " ${ working_dir } /hpr ${ ep_num } _source. ${ media ##*. } "
fi
}
#################################################
# Generate Subtitles
function generate_show_transcript( ) {
echo_debug "Generate show transcript and subtitles."
# TODO Currently processed elsewhere by hpr-get-and-transcode.bash and uploaded to hpr:upload/ to be synced with media above
if [ [ ! -s " ${ media } " || ! -s " ${ media %.* } .srt " || ! -s " ${ intro_srt } " || ! -s " ${ outro_srt } " || ! -s " ${ working_dir } /processing/episode_intro.flac " || ! -s " ${ working_dir } /processing/episode_body.flac " ] ]
then
ls -al " ${ media } " " ${ media %.* } .srt " " ${ intro_srt } " " ${ outro_srt } " " ${ working_dir } /processing/episode_intro.flac " " ${ working_dir } /processing/episode_body.flac "
echo_error "The transcriptions files are not available."
fi
# Copy in the intro subtitle template and replace each line with the text with the summary
date = " $( jq --raw-output '.date' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
title = " $( jq --raw-output '.title' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
duration = " $( jq --raw-output '.duration' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
duration = " $( \d ate -d@${ duration } -u +%H:%M:%S ) "
artist = " $( jq --raw-output '.artist' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
explicit = " $( jq --raw-output '.explicit' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
license = " $( jq --raw-output '.license' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
summary = " $( jq --raw-output '.summary' " ${ working_dir } /episode_summary.json " | sed -e 's/ \././g' -e 's/\.\./\./g' -e 's/ / /g' ) "
if [ [ -z " ${ date } " || " ${ date } " = = "null" || -z " ${ title } " || " ${ title } " = = "null" || -z " ${ duration } " || " ${ duration } " = = "null" || -z " ${ duration } " || " ${ duration } " = = "null" || -z " ${ artist } " || " ${ artist } " = = "null" || -z " ${ explicit } " || " ${ explicit } " = = "null" || -z " ${ license } " || " ${ license } " = = "null" || -z " ${ summary } " || " ${ summary } " = = "null" ] ]
then
echo_error "Could not retrieve the synopsis for the text to speech."
ls -al " ${ working_dir } /episode_summary.json "
fi
REPLACE_LINE_1 = " This is Hacker Public Radio Episode ${ ep_num } , for ${ date } "
REPLACE_LINE_2 = " Today's show is entitled, \" ${ title } \" "
REPLACE_LINE_3 = " The host is ${ artist } and the duration is ${ duration } "
REPLACE_LINE_4 = " The flag is ${ explicit } , and the license is ${ license } "
REPLACE_LINE_5 = " The summary is \" ${ summary } \" "
cp -v ${ intro_srt } " ${ working_dir } /processing/episode_intro.srt "
cp -v ${ outro_srt } " ${ working_dir } /processing/episode_outro.srt "
sed -e " s~REPLACE_LINE_1~ ${ REPLACE_LINE_1 } ~g " -e " s~REPLACE_LINE_2~ ${ REPLACE_LINE_2 } ~g " -e " s~REPLACE_LINE_3~ ${ REPLACE_LINE_3 } ~g " -e " s~REPLACE_LINE_4~ ${ REPLACE_LINE_4 } ~g " -e " s~REPLACE_LINE_5~ ${ REPLACE_LINE_5 } ~g " -i " ${ working_dir } /processing/episode_intro.srt "
if [ " $( grep --count REPLACE_LINE " ${ working_dir } /processing/episode_intro.srt " ) " -ne "0" ]
then
echo_error " The intro subtitles were not correctly generated \" ${ working_dir } /processing/episode_intro.srt\". "
fi
# Time shift the media subtitles on by the duration of the intro wav file
# https://trac.ffmpeg.org/wiki/UnderstandingItsoffset
itsoffset_intro = " $( mediainfo --full --Output= JSON " ${ working_dir } /processing/episode_intro.flac " | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' ) "
if [ [ -z " ${ itsoffset_intro } " || " ${ itsoffset_intro } " = = "null" ] ]
then
echo_error "Could not retrieve the itsoffset_intro to correct the timing of the subtitles."
fi
ffmpeg -hide_banner -loglevel error -y -itsoffset " ${ itsoffset_intro } " -i " ${ media %.* } .srt " -c copy " ${ working_dir } /processing/episode_body.srt "
# Timeshift the outro by the duration of the intro and the supplied media
itsoffset_body = " $( mediainfo --full --Output= JSON " ${ working_dir } /processing/episode_body.flac " | jq --raw-output '.media.track | .[] | select(."@type"=="Audio") | .Duration' | awk -F '.' '{print $1}' ) "
if [ [ -z " ${ itsoffset_body } " || " ${ itsoffset_body } " = = "null" ] ]
then
echo_error "Could not retrieve the itsoffset_body to correct the timing of the subtitles."
fi
itsoffset_body = $(( itsoffset_intro + $itsoffset_body ))
ffmpeg -hide_banner -loglevel error -y -itsoffset " ${ itsoffset_body } " -i " ${ working_dir } /processing/episode_outro.srt " -c copy " ${ working_dir } /processing/episode_outro_shifted.srt "
# Combine the intro, timeshifted media subtitles, and the timeshifted outro subtitles.
cat " ${ working_dir } /processing/episode_intro.srt " " ${ working_dir } /processing/episode_body.srt " " ${ working_dir } /processing/episode_outro_shifted.srt " > " ${ working_dir } /processing/episode.srt "
# Parse the resulting subtitle file fixing the numberic counter
# https://en.wikipedia.org/wiki/SubRip
count = 1
cat " ${ working_dir } /processing/episode.srt " | while read this_line
do
if [ " $( echo " ${ this_line } " | grep -c -P '^[0-9]+$' ) " -eq "1" ]
then
echo " ${ count } "
count = $(( count+1))
else
echo " ${ this_line } "
fi
done > " ${ working_dir } /hpr ${ ep_num } .srt "
# extract the txt version
grep -Pv -- '-->|^$|^[0-9]+$' " ${ working_dir } /hpr ${ ep_num } .srt " > " ${ working_dir } /hpr ${ ep_num } .txt "
if [ [ ! -s " ${ working_dir } /hpr ${ ep_num } .srt " || ! -s " ${ working_dir } /hpr ${ ep_num } .txt " ] ]
then
echo_error "The transcriptions files were not generated."
ls -al " ${ working_dir } /hpr ${ ep_num } .srt " " ${ working_dir } /hpr ${ ep_num } .txt "
fi
}
#################################################
## Generate Final Report
function generate_final_report( ) {
echo_debug "Generating the final report."
final_report = " ${ working_dir } /processing/hpr ${ ep_num } _report.html "
for this_file_extension_to_check in flac mp3 ogg opus srt txt wav
do
if [ [ ! -s " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " ] ]
then
ls -al " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } "
echo_error " The generated media is missing \" ${ this_file_extension_to_check } \". "
fi
done
if [ [ ! -s " ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html " ] ]
then
ls -al " ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html "
echo_error " The initial report is not available.\" ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html\" "
fi
grep -Pv '</body>|</html>' " ${ working_dir } /processing/ ${ media_basename %.* } _media_report.html " > " ${ final_report } "
echo " <h3>Text To Speech</h3>
<p>
$( echo " ${ synopsis } " )
</p>
<p>
<audio controls = \" \" preload = \" none\" style = \" width:600px; \" >
<source src = \" ${ working_dir } /processing//episode_tts.flac\" type = \" audio/flac\" >
</audio>
<br />
<a href = \" ${ working_dir } /processing//episode_tts.flac\" >${ working_dir } /processing//episode_tts.flac</a>
</p>
<hr />" >> " ${ final_report } "
for this_file_extension_to_check in flac mp3 ogg opus wav
do
ffmpeg -hide_banner -loglevel error -y -i " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " -lavfi "showspectrumpic=s=960x540" " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _spectrum.png "
audio2image.bash " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " && mv -v " ${ working_dir } /hpr ${ ep_num } .png " " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _waveform.png "
mediainfo " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " > " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _mediainfo.txt "
exiftool " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " > " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _exiftool.txt "
ffprobe " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " 2>& 1 | grep Audio: | sed 's/^.\s//g' > " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _ffprobe.txt "
file --brief --mime " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " >> " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _ffprobe.txt "
this_file_mime_type = " $( file --brief --mime-type " ${ working_dir } /hpr ${ ep_num } . ${ this_file_extension_to_check } " ) "
for this_file_to_check in spectrum.png waveform.png mediainfo.txt exiftool.txt ffprobe.txt
do
if [ [ ! -s " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _ ${ this_file_to_check } " ] ]
then
ls -al " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _ ${ this_file_to_check } "
echo_error " The inital report information is missing \" ${ this_file_to_check } \". "
fi
done
echo " <h2>Report for derived media file \"hpr ${ ep_num } . ${ this_file_extension_to_check } \"</h2>
<h3>mediainfo report</h3>
<code>
<pre>
$( cat " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _mediainfo.txt " )
</pre>
</code>
<h3>exiftool report</h3>
<code>
<pre>
$( cat " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _exiftool.txt " )
</pre>
</code>
<h3>Audio Spectrum</h3>
<p>
<img src = \" ${ working_dir } /processing/hpr${ ep_num } _${ this_file_extension_to_check } _spectrum.png\" alt = \" Spectrum\" />
</p>
<h3>Audio Waveform</h3>
<p>
<img src = \" ${ working_dir } /processing/hpr${ ep_num } _${ this_file_extension_to_check } _waveform.png\" alt = \" Waveform\" />
</p>
<pre>
$( cat " ${ working_dir } /processing/hpr ${ ep_num } _ ${ this_file_extension_to_check } _ffprobe.txt " )
</pre>
<p>
<audio controls = \" \" preload = \" none\" style = \" width:600px; \" >
<source src = \" ${ working_dir } /hpr${ ep_num } .${ this_file_extension_to_check } \" type = \" ${ this_file_mime_type } \" >
</audio>
<br />
<a href = \" ${ working_dir } /hpr${ ep_num } .${ this_file_extension_to_check } \" >${ working_dir } /hpr${ ep_num } .${ this_file_extension_to_check } </a>
</p>" >> " ${ final_report } "
done
echo "
<h3>Rendered shownotes.html</h3>
<hr />
$( cat " ${ shownotes_html %.* } _edited.html " )
<hr />
<h3>Subtitle File</h3>
<hr />
<pre>
$( cat " ${ working_dir } /hpr ${ ep_num } .srt " )
</pre>
<hr />
2025-01-16 21:02:43 +00:00
<h3>Transcript File</h3>
2025-01-14 19:41:34 +00:00
<hr />
<pre>
$( cat " ${ working_dir } /hpr ${ ep_num } .txt " )
</pre>
<hr />
</body>
</html>" >> " ${ final_report } "
}
#################################################
## Manually edit the shownotes to fix issues
function manual_final_review( ) {
echo_debug "Validating the final report."
if [ [ -z " ${ final_report } " || ! -s " ${ final_report } " ] ]
then
ls -al " ${ final_report } "
echo_error "The files needed for to generate the final report information are not available."
fi
librewolf " ${ final_report } " >/dev/null 2>& 1 &
# # # # bluefish "${shownotes_html%.*}_edited.html" >/dev/null 2>&1 &
# https://markdowntohtml.com/
read -p "Does the metadata 'look ok ? (N|y) ? " -n 1 -r
echo # (optional) move to a new line
if [ [ ! $REPLY = ~ ^[ yY] $ ] ]
then
echo "skipping...."
echo_error "The final review was not approved."
fi
}
2025-01-15 21:08:04 +00:00
#################################################
# Register the assets with the hpr database
function register_assets( ) {
echo_debug "Registering the assets with the hpr database"
if [ [ -s " ${ working_dir } /hpr ${ ep_num } _assets.csv " ] ]
then
echo_debug " Removing \" ${ working_dir } /hpr ${ ep_num } _assets.csv\". "
rm -v " ${ working_dir } /hpr ${ ep_num } _assets.csv "
fi
echo '"episode_id","filename","extension","size", "sha1sum", "mime_type", "file_type"' | tee " ${ working_dir } /hpr ${ ep_num } _assets.csv "
2025-01-16 21:02:43 +00:00
for this_asset_filename in hpr${ ep_num } .flac hpr${ ep_num } .wav hpr${ ep_num } .mp3 hpr${ ep_num } .ogg hpr${ ep_num } .opus hpr${ ep_num } .srt hpr${ ep_num } .txt $( find " ${ working_dir } / " -maxdepth 1 -type f -iname " hpr ${ ep_num } _image_*.* " )
2025-01-15 21:08:04 +00:00
do
2025-01-16 21:02:43 +00:00
this_asset_filename = " $( basename " ${ this_asset_filename } " ) "
echo_debug " Registering \" ${ this_asset_filename } \". "
this_asset = " ${ working_dir } / ${ this_asset_filename } "
if [ [ ! -s " ${ this_asset } " ] ]
2025-01-15 21:08:04 +00:00
then
2025-01-16 21:02:43 +00:00
echo_error " Failed to register missing file \" ${ this_asset } \". "
ls -al " ${ this_asset } "
2025-01-15 21:08:04 +00:00
fi
this_asset_basename = $( basename " ${ this_asset } " )
this_asset_extension = " ${ this_asset_basename ##*. } "
this_asset_size = " $( ls -al " ${ this_asset } " | awk '{print $5}' ) "
this_asset_sha1sum = " $( sha1sum " ${ this_asset } " | awk '{print $1}' ) "
this_asset_mime_type = $( file --dereference --brief --mime " ${ this_asset } " )
this_asset_file_type = $( file --dereference --brief " ${ this_asset } " )
2025-01-16 21:02:43 +00:00
if [ " $( echo ${ this_asset_file_type } | wc --chars ) " -gt "130" ]
then
this_asset_file_type = " ${ this_asset_mime_type } "
fi
2025-01-15 21:08:04 +00:00
variables = ( ep_num this_asset_basename this_asset_extension this_asset_size this_asset_sha1sum this_asset_mime_type this_asset_file_type working_dir ep_num )
for variable in " ${ variables [@] } "
do
if [ -z " ${ !variable } " ]
then # indirect expansion here
echo_error " The variable \" ${ variable } \" is missing. " ;
else
echo_debug " The variable \" ${ variable } \" is set to \" ${ !variable } \" " ;
fi
done
echo " ${ ep_num } ,\" ${ this_asset_basename } \",\" ${ this_asset_extension } \",\" ${ this_asset_size } \",\" ${ this_asset_sha1sum } \",\" ${ this_asset_mime_type } \",\" ${ this_asset_file_type } \" " | tee --append " ${ working_dir } /hpr ${ ep_num } _assets.csv "
done
if [ -s " ${ working_dir } /hpr ${ ep_num } _assets.csv " ]
then
cat " ${ working_dir } /hpr ${ ep_num } _assets.csv " | csvtojson | jq '{"assets":[.[]]}' | tee " ${ working_dir } /hpr ${ ep_num } _assets.json "
fi
if [ ! -s " ${ working_dir } /hpr ${ ep_num } _assets.json " ]
then
echo_error " The asset json file \" ${ working_dir } /hpr ${ ep_num } _assets.json\" is missing. " ;
fi
2025-01-16 21:02:43 +00:00
response = " $( curl --silent --netrc-file $HOME /.netrc --write-out '%{http_code}' --output /dev/null --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @" ${ working_dir } /hpr ${ ep_num } _assets.json " --header "Content-Type: application/json" ) "
if [ [ -z " ${ response } " || " ${ response } " != "200" ] ]
2025-01-15 21:08:04 +00:00
then
2025-01-16 21:02:43 +00:00
echo_error " The assets for episode hpr ${ ep_num } has not been registered. The response was \" ${ response } \" "
2025-01-15 21:08:04 +00:00
fi
}
2025-01-16 21:02:43 +00:00
#################################################
# Register the assets with the hpr database
function copy_files_to_origin_server( ) {
echo_debug "Copying the files to the origin server"
# TODO get a origin server capable of storing all the files
for this_asset in hpr${ ep_num } .mp3 hpr${ ep_num } .ogg hpr${ ep_num } .opus hpr${ ep_num } .srt hpr${ ep_num } .txt $( find " ${ working_dir } / " -type f -iname " hpr ${ ep_num } _image_*.* " )
do
this_asset = " $( basename ${ this_asset } ) "
this_file = " ${ working_dir } / ${ this_asset } "
echo_debug " Copying \" ${ this_file } \" to the origin server. "
if [ [ ! -s " ${ this_file } " ] ]
then
echo_error " Failed to transfer missing file \" ${ this_file } \". "
ls -al " ${ this_file } "
fi
rsync --archive --quiet --partial --progress " ${ this_file } " rsync.net:hpr/eps/hpr${ ep_num } /${ this_asset }
origin_sha1sum = " $( echo $( ssh rsync.net " sha1 hpr/eps/hpr ${ ep_num } / ${ this_asset } " 2> /dev/null ) | awk '{print $NF}' ) "
this_asset_sha1sum = " $( sha1sum " ${ this_file } " | awk '{print $1}' ) "
if [ [ -z " ${ origin_sha1sum } " || -z " ${ this_asset_sha1sum } " ] ]
then
echo_error " Could not determine the local/origin sha1sum for file \" ${ this_file } \". "
fi
if [ " ${ origin_sha1sum } " != " ${ this_asset_sha1sum } " ]
then
echo_error " The local sha1sum \" ${ origin_sha1sum } \" and origin \" ${ this_asset_sha1sum } \" are mismatched for file \" ${ this_file } \". "
fi
done
}
2025-01-14 19:41:34 +00:00
#################################################
# Send the derived files to the server borg to be sent to the Internet Archive
function copy_derived_files_to_borg_for_the_internet_archive( ) {
echo_debug "Sending the derived files to the server borg to be sent to the Internet Archive"
for ext in flac mp3 ogg opus wav
do
if [ [ ! -s " ${ working_dir } /hpr ${ ep_num } . ${ ext } " ] ]
then
echo_error " The inital report information is missing \" ${ ext } \". "
ls -al " ${ working_dir } /hpr ${ ep_num } . ${ ext } "
fi
done
echo_debug " rsync -ave ssh --partial --progress \" ${ working_dir } /hpr ${ ep_num } .{flac,mp3,ogg,opus,wav}\" borg:/data/IA/uploads/ "
rsync -ave ssh --partial --progress " ${ working_dir } /hpr ${ ep_num } " .{ flac,mp3,ogg,opus,wav} borg:/data/IA/uploads/
rsync_error = " ${ ? } "
if [ " ${ rsync_error } " -ne "0" ]
then
echo_error " rsync to \"borg:/data/IA/uploads/\" failed with error ${ rsync_error } "
fi
rsync -ave ssh --partial --progress " ${ working_dir } /hpr ${ ep_num } " .{ txt,srt} borg:/data/IA/uploads/hpr${ ep_num } /
rsync_error = " ${ ? } "
if [ " ${ rsync_error } " -ne "0" ]
then
echo_error " rsync to \"borg:/data/IA/uploads/hpr ${ ep_num } /\" failed with error ${ rsync_error } "
fi
rsync -ave ssh --partial --progress " ${ shownotes_html %.* } _edited.html " borg:/data/IA/uploads/hpr${ ep_num } /shownotes.html
rsync_error = " ${ ? } "
if [ " ${ rsync_error } " -ne "0" ]
then
echo_error " rsync to \"borg:/data/IA/uploads/hpr ${ ep_num } /shownotes.html\" failed with error ${ rsync_error } "
fi
# Get the current status
if [ " $( curl --silent --netrc-file ${ HOME } /.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output " ${ processing_dir } /status.csv " ) " != 200 ]
then
echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\""
fi
# Check the current status is correct SHOW_POSTED
response = $( grep -P " , ${ ep_num } ,.*SHOW_POSTED, " " ${ processing_dir } /status.csv " | head -1 | sed 's/,/ /g' )
if [ -z " ${ response } " ]
then
grep -P " , ${ ep_num } ,.*SHOW_POSTED, " " ${ processing_dir } /status.csv "
echo_debug " The show \" ${ ep_num } \" hasn't the correct status of \"SHOW_POSTED\" in the database. "
fi
# Try and change the online db status to MEDIA_TRANSCODED
if [ " $( curl --silent --netrc-file ${ HOME } /.netrc --write-out '%{http_code}' " https://hub.hackerpublicradio.org/cms/status.php?ep_num= ${ ep_num } &status=MEDIA_TRANSCODED " ) " != 200 ]
then
echo_error " Could not change the status of \" ${ ep_num } \" to \"MEDIA_TRANSCODED\" "
fi
# Get the current status
if [ " $( curl --silent --netrc-file ${ HOME } /.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output " ${ processing_dir } /status.csv " ) " != 200 ]
then
echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\""
fi
# Check the current status is correct MEDIA_TRANSCODED
response = $( grep -P " , ${ ep_num } ,.*MEDIA_TRANSCODED, " " ${ processing_dir } /status.csv " | head -1 | sed 's/,/ /g' )
if [ -z " ${ response } " ]
then
grep -P " , ${ ep_num } ,.*MEDIA_TRANSCODED, " " ${ processing_dir } /status.csv "
echo_error " The show \" ${ ep_num } \" hasn't the correct status of \"MEDIA_TRANSCODED\" in the database. "
fi
echo_debug " The show \" ${ ep_num } \" has the correct status of \"MEDIA_TRANSCODED\" in the database. "
#TODO images
# <img alt="Picture 1 shows the broken dog walking accessory." border="0" height="300" src="https://archive.org/download/hpr4283/hpr4283/hpr4283_1_tn.jpeg" width="400" />
}
#################################################
# Main exceution starts here
#
# This tool will process the HPR shows allowing the janitor to review the media and fix shownotes.
#
# TODO Add support for reserve queue - process validate and move to reserve dir
# TODO Add support to reprocess processed shows - when given onlay new media reprocess it, update duration on hub, generate and download shownotes.{html,json} from db
# TODO Add support for community news - reusing ^^^
# TODO Add support for stereo for some episodes that request it
# TODO Include links in extract_images_brute_force
# TODO take screenshots of the rendered episode on the hpr website
# TODO audio_channels default to mono - stereo as an option
# TODO Add chapter support
# TODO incorporate direct upload to the IA
# TODO copy the files to the backup disk
program_checks # We know that all the programs and variables are set
get_working_dir $@ # We have a working directory and a valid json file
get_episode_metadata $@ # We have all the metadata we need to process the show.
2025-01-16 21:02:43 +00:00
extract_images_brute_force # We have extracted the images by brute force
2025-01-14 19:41:34 +00:00
media_checks #
generate_initial_report # Generate Initial Report for review by the Janitors
manual_shownotes_review # Janitors review audio and shownote. Skips if done.
post_show_to_hpr_db # Posts the episode to HPR. Skips if it is already posted.
create_tts_summary # Generate text to speech summary
generate_intro # Generate Intro from the intro theme with overlay of a lead in silence then the tts summary
generate_parent_audio # Combines the intro, the episode, and the outro to a final cut.
generate_derived_media # Generate the flac wav mp3 ogg opus files
generate_show_transcript
generate_final_report
manual_final_review
2025-01-15 21:08:04 +00:00
register_assets
2025-01-14 19:41:34 +00:00
2025-01-16 21:02:43 +00:00
copy_files_to_origin_server
2025-01-14 19:41:34 +00:00
copy_derived_files_to_borg_for_the_internet_archive
2025-01-15 21:08:04 +00:00
echo_debug "The End"
exit 0