diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index 25b0909..51ce5e6 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -114,6 +114,8 @@ function check_variable_is_correct() { echo_error "The \"ep_num\" variable is not a valid number between 1 and 9999." fi ;; + + shownotes_edited) if [[ ! -s "${shownotes_edited}" || -z "${shownotes_edited}" ]] then @@ -133,6 +135,11 @@ function check_variable_is_correct() { then echo_error "The \"episode_summary_json\" variable has not a valid \"application/json\" mime type." fi + jq '.' "${episode_summary_json}" >/dev/null 2>&1 + if [ $? -ne 0 ] + then + echo_error "The file \"${episode_summary_json}\" is not valid json." + fi ;; assets_csv) if [[ ! -s "${assets_csv}" || -z "${assets_csv}" ]] @@ -153,7 +160,43 @@ function check_variable_is_correct() { then echo_error "The \"assets_json\" variable has not a valid \"application/json\" mime type." fi + jq '.' "${assets_json}" >/dev/null 2>&1 + if [ $? -ne 0 ] + then + echo_error "The file \"${assets_json}\" is not valid json." + fi ;; + files_json) + if [[ ! -s "${files_json}" || -z "${files_json}" ]] + then + echo_error "The \"files_json\" variable/file is missing." + fi + if [ "$( file --brief --mime-type "${files_json}" | grep --count 'application/json' )" -ne "1" ] + then + echo_error "The \"files_json\" variable has not a valid \"application/json\" mime type." + fi + jq '.' "${files_json}" >/dev/null 2>&1 + if [ $? -ne 0 ] + then + echo_error "The file \"${files_json}\" is not valid json." + fi + ;; + files_xml) + if [[ ! -s "${files_xml}" || -z "${files_xml}" ]] + then + echo_error "The \"files_xml\" variable/file is missing." + fi + if [ "$( file --brief --mime-type "${files_xml}" | grep --count 'text/xml' )" -ne "1" ] + then + echo_error "The \"files_xml\" variable has not a valid \"text/xml\" mime type." + fi + xmllint --format "${files_xml}" >/dev/null 2>&1 + if [ $? -ne 0 ] + then + echo_error "The file \"${files_xml}\" is not valid xml." + fi + ;; + album) if [[ -z "${album}" || "${album}" == "null" ]] @@ -1686,23 +1729,90 @@ function upload_files_to_the_internet_archive() { check_variable_is_correct working_dir ep_num - if [ -z "${assets_json}" ] + # Get the file list from the IA + if [ -z "${files_xml}" ] then - assets_json="${working_dir}/episode_summary.json" + files_xml="${working_dir}/hpr${ep_num}_files.xml" fi - if [ ! -s "${assets_json}" ] + if [ ! -s "${files_xml}" ] then - echo_debug "The \"assets_json\" variable/file is missing." - if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num} --output "${assets_json}" )" != 200 ] + echo_debug "Getting the \"files_xml\" variable/file from the HPR site." + if [ "$( curl --silent --netrc --location --write-out '%{http_code}' https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml --output "${files_xml}" )" != 200 ] + then + echo_error "Could not find file list \"https://archive.org/download/hpr${ep_num}/hpr${ep_num}_files.xml\"" + fi + fi + check_variable_is_correct files_xml + + # Get the file list from the HPR db + if [ -z "${files_json}" ] + then + files_json="${working_dir}/hpr${ep_num}_files.json" + fi + + if [ ! -s "${files_json}" ] + then + echo_debug "Getting the \"files_json\" variable/file from the HPR site." + if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ] then echo_error "The Episode hpr${ep_num} has not been posted" fi fi - check_variable_is_correct assets_json - + check_variable_is_correct files_json + + for this_file in $( jq --raw-output ".hpr${ep_num} | keys | @tsv" "${files_json}" ) + do + db_file_info="$( jq --raw-output ".hpr${ep_num}.[] | select(.filename==\"${this_file}\") | [ .filename, .size, .sha1sum ] | @tsv" "${files_json}" )" + ia_file_info="$( xmlstarlet sel -T -t -m "/files/file[@name=\"${this_file}\"]" -v "concat(@name, '→', size, '→', sha1)" -n "${files_xml}" | sed 's/→/\t/g' )" + if [ -z "${db_file_info}" ] + then + echo_error "The information for \"${this_file}\" could not be found in \"${files_json}\"" + fi + if [ ! -z "${ia_file_info}" ] + then + if [ "${db_file_info}" == "${ia_file_info}" ] + then + echo_debug "The file \"${this_file}\" on HPR and on the IA matches." + continue + else + echo_debug "The file \"${this_file}\" on HPR and on the IA do not match." + fi + fi + + echo_debug "Transferring \"${this_file}\" to the IA" + + this_file_path="${working_dir}/${this_file}" + + if [ ! -s "${this_file_path}" ] + then + echo_error "The file \"${this_file}\" could not be found in the working directory \"${working_dir}\"." + fi + + this_file_path_sha1="$( sha1sum $this_file_path | awk '{print $1}' )" + db_file_info_sha1="$( echo ${db_file_info} | awk '{print $NF}' )" + + if [ "${this_file_path_sha1}" != "${db_file_info_sha1}" ] + then + echo_error "The sha1 sum of the local file \"${this_file}\" does not match the HPR database." + continue + else + ia upload hpr${ep_num} "${this_file_path}" --header "x-archive-keep-old-version:0" --retries=5 --no-derive --no-backup + fi + + done + +} + +function hide_me() { + +echo "hidden" +# + # Compare assets_json to files_xml + # locate the different files and upload them + # if [ "$( find ${working_dir} -mindepth 1 -maxdepth 1 -type f \( -iname "hpr${ep_num}.flac" -or -iname "hpr${ep_num}.mp3" -or -iname "hpr${ep_num}.ogg" -or -iname "hpr${ep_num}.opus" -or -iname "hpr${ep_num}_source.*" -or -iname "hpr${ep_num}.srt" -or -iname "hpr${ep_num}.txt" -or -iname "hpr${ep_num}.wav" \) | wc --lines )" -ne "8" ] # then # echo_error "There are not 8 derived files for the Internet Archive."