Checks db for the correct files and uploads to IA

This commit is contained in:
Ken Fallon 2025-05-10 21:33:05 +02:00
parent d7bee0be56
commit 946fb47508

View File

@ -114,6 +114,8 @@ function check_variable_is_correct() {
echo_error "The \"ep_num\" variable is not a valid number between 1 and 9999."
fi
;;
shownotes_edited)
if [[ ! -s "${shownotes_edited}" || -z "${shownotes_edited}" ]]
then
@ -133,6 +135,11 @@ function check_variable_is_correct() {
then
echo_error "The \"episode_summary_json\" variable has not a valid \"application/json\" mime type."
fi
jq '.' "${episode_summary_json}" >/dev/null 2>&1
if [ $? -ne 0 ]
then
echo_error "The file \"${episode_summary_json}\" is not valid json."
fi
;;
assets_csv)
if [[ ! -s "${assets_csv}" || -z "${assets_csv}" ]]
@ -153,7 +160,43 @@ function check_variable_is_correct() {
then
echo_error "The \"assets_json\" variable has not a valid \"application/json\" mime type."
fi
jq '.' "${assets_json}" >/dev/null 2>&1
if [ $? -ne 0 ]
then
echo_error "The file \"${assets_json}\" is not valid json."
fi
;;
files_json)
if [[ ! -s "${files_json}" || -z "${files_json}" ]]
then
echo_error "The \"files_json\" variable/file is missing."
fi
if [ "$( file --brief --mime-type "${files_json}" | grep --count 'application/json' )" -ne "1" ]
then
echo_error "The \"files_json\" variable has not a valid \"application/json\" mime type."
fi
jq '.' "${files_json}" >/dev/null 2>&1
if [ $? -ne 0 ]
then
echo_error "The file \"${files_json}\" is not valid json."
fi
;;
files_xml)
if [[ ! -s "${files_xml}" || -z "${files_xml}" ]]
then
echo_error "The \"files_xml\" variable/file is missing."
fi
if [ "$( file --brief --mime-type "${files_xml}" | grep --count 'text/xml' )" -ne "1" ]
then
echo_error "The \"files_xml\" variable has not a valid \"text/xml\" mime type."
fi
xmllint --format "${files_xml}" >/dev/null 2>&1
if [ $? -ne 0 ]
then
echo_error "The file \"${files_xml}\" is not valid xml."
fi
;;
album)
if [[ -z "${album}" || "${album}" == "null" ]]
@ -1686,23 +1729,90 @@ function upload_files_to_the_internet_archive() {
check_variable_is_correct working_dir ep_num
if [ -z "${assets_json}" ]
# Get the file list from the IA
if [ -z "${files_xml}" ]
then
assets_json="${working_dir}/episode_summary.json"
files_xml="${working_dir}/hpr${ep_num}_files.xml"
fi
if [ ! -s "${assets_json}" ]
if [ ! -s "${files_xml}" ]
then
echo_debug "The \"assets_json\" variable/file is missing."
if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${assets_json}" )" != 200 ]
echo_debug "Getting the \"files_xml\" variable/file from the HPR site."
if [ "$( curl --silent --netrc --location --write-out '%{http_code}' https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml --output "${files_xml}" )" != 200 ]
then
echo_error "Could not find file list \"https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml\""
fi
fi
check_variable_is_correct files_xml
# Get the file list from the HPR db
if [ -z "${files_json}" ]
then
files_json="${working_dir}/hpr${ep_num}_files.json"
fi
if [ ! -s "${files_json}" ]
then
echo_debug "Getting the \"files_json\" variable/file from the HPR site."
if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ]
then
echo_error "The Episode hpr${ep_num} has not been posted"
fi
fi
check_variable_is_correct assets_json
check_variable_is_correct files_json
for this_file in $( jq --raw-output ".hpr${ep_num} | keys | @tsv" "${files_json}" )
do
db_file_info="$( jq --raw-output ".hpr${ep_num}.[] | select(.filename==\"${this_file}\") | [ .filename, .size, .sha1sum ] | @tsv" "${files_json}" )"
ia_file_info="$( xmlstarlet sel -T -t -m "/files/file[@name=\"${this_file}\"]" -v "concat(@name, '→', size, '→', sha1)" -n "${files_xml}" | sed 's/→/\t/g' )"
if [ -z "${db_file_info}" ]
then
echo_error "The information for \"${this_file}\" could not be found in \"${files_json}\""
fi
if [ ! -z "${ia_file_info}" ]
then
if [ "${db_file_info}" == "${ia_file_info}" ]
then
echo_debug "The file \"${this_file}\" on HPR and on the IA matches."
continue
else
echo_debug "The file \"${this_file}\" on HPR and on the IA do not match."
fi
fi
echo_debug "Transferring \"${this_file}\" to the IA"
this_file_path="${working_dir}/${this_file}"
if [ ! -s "${this_file_path}" ]
then
echo_error "The file \"${this_file}\" could not be found in the working directory \"${working_dir}\"."
fi
this_file_path_sha1="$( sha1sum $this_file_path | awk '{print $1}' )"
db_file_info_sha1="$( echo ${db_file_info} | awk '{print $NF}' )"
if [ "${this_file_path_sha1}" != "${db_file_info_sha1}" ]
then
echo_error "The sha1 sum of the local file \"${this_file}\" does not match the HPR database."
continue
else
ia upload hpr${ep_num} "${this_file_path}" --header "x-archive-keep-old-version:0" --retries=5 --no-derive --no-backup
fi
done
}
function hide_me() {
echo "hidden"
#
# Compare assets_json to files_xml
# locate the different files and upload them
# if [ "$( find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \) | wc --lines )" -ne "8" ]
# then
# echo_error "There are not 8 derived files for the Internet Archive."