Files
hpr-tools/workflow/hpr-get-and-transcribe.bash

86 lines
2.9 KiB
Bash
Raw Normal View History

#!/bin/bash
2026-05-13 12:56:34 +02:00
destination_dir="$HOME/tmp/hpr/processing/"
server_dir="hub.hackerpublicradio.org"
2026-05-13 12:56:34 +02:00
#################################################
# Display Error message, display help and exit
function echo_error() {
echo -e "ERROR: $@" #1>&2
display_help_and_exit
exit 1
}
#################################################
# Display Information message
function echo_debug() {
if [ "${debug}" != "0" ]
then
2026-05-13 12:56:34 +02:00
echo -e "INFO: $@" #1>&2
fi
2026-05-13 12:56:34 +02:00
}
#################################################
# Display Help
function display_help_and_exit() {
echo_debug "For more information see https://repo.anhonesthost.net/HPR/hpr_documentation"
exit 1
}
#################################################
# main
2026-05-13 12:56:34 +02:00
if [ "$( curl --location --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/status.php --output "${destination_dir}/status.csv" )" != 200 ]
then
echo_error "Could not get a list of the queue status from \"https://hub.hackerpublicradio.org/cms/status.php\""
fi
2026-05-13 12:56:34 +02:00
if [ ! -s "${destination_dir}/status.csv" ]
then
echo_error "Failed to retrieve \"${destination_dir}/status.csv\" from server."
fi
response=$( cat "${destination_dir}/status.csv" | grep --extended-regexp ',SHOW_SUBMITTED,|,RESERVE_SHOW_SUBMITTED,' | sort -t ',' -k 2 )
if [ -z "${response}" ]
then
echo_debug "Getting a list of all the reservations."
curl --location --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n
echo_error "There appear to be no more shows with the status \"SHOW_SUBMITTED\"."
fi
printf '%s\n' ${response} | while read this_line
do
2026-05-13 12:56:34 +02:00
IFS=, read -r timestamp_epoc ep_num ep_date key status email <<< "$( printf '%s\n' "${this_line}" )"
if [ "${ep_num}" == "9999" ]
then
source_dir="${server_dir}/reserve/"
else
source_dir="${server_dir}/upload/"
fi
if [ "$( find "${destination_dir}" -maxdepth 2 -mindepth 2 -type f -iname "*${key}*.srt" | wc --lines )" -ne "0" ]
then
2026-05-13 12:56:34 +02:00
echo_debug "The episode \"${timestamp_epoc} ${ep_num} ${ep_date} ${key} ${status} ${email}\" has been processed."
continue
fi
2026-05-13 12:56:34 +02:00
show_path="$( ssh -n hpr -t "find \"${source_dir}\" -maxdepth 1 -mindepth 1 -type d -iname \"*${key}*\"" 2>/dev/null | sed "s#${source_dir}##g#" )"
if [ -z "${show_path}" ]
then
echo_error "The show_path could not be found."
fi
show_dir="${destination_dir}/${show_path}"
echo_debug "Copying \"hpr:${source_dir}/${show_path}/\" to \"${show_dir}/\""
rsync --archive --verbose --partial --progress hpr:${source_dir}/${show_path}/ ${show_dir}/
cd "${show_dir}"
pwd
find "${show_dir}/" -type f -exec file {} \; | grep -Ei 'audio|mpeg|video|MP4' | awk -F ': ' '{print $1}' | while read this_media
do
whisper --output_format srt --model tiny --language en --output_dir "${show_dir}" "${this_media}"
done
rsync --archive --verbose --partial --progress ${show_dir}/ hpr:${source_dir}/${show_path}/
done