From 27bb3be4afc279a9bf759c7793b24653db9ed10f Mon Sep 17 00:00:00 2001 From: Ken Fallon Date: Mon, 8 Sep 2025 09:49:49 +0200 Subject: [PATCH 1/5] Move https://repo.anhonesthost.net/HPR/hpr_hub/issues/88 --- workflow/hpr-get-and-transcribe.bash | 35 ++++++++++++++++++++ workflow/hpr-publish.bash | 44 +++++++++++++++++++++++++ workflow/hpr_ccdn_stats.bash | 4 +++ workflow/hpr_db_backup.bash | 48 ++++++++++++++++++++++++++++ workflow/mdb.bash | 4 +++ workflow/process_episode.bash | 10 +++--- workflow/run-speedtest.bash | 6 ++++ workflow/update-stats.bash | 8 +++++ 8 files changed, 154 insertions(+), 5 deletions(-) create mode 100755 workflow/hpr-get-and-transcribe.bash create mode 100755 workflow/hpr-publish.bash create mode 100644 workflow/hpr_ccdn_stats.bash create mode 100755 workflow/hpr_db_backup.bash create mode 100755 workflow/mdb.bash create mode 100644 workflow/run-speedtest.bash create mode 100644 workflow/update-stats.bash diff --git a/workflow/hpr-get-and-transcribe.bash b/workflow/hpr-get-and-transcribe.bash new file mode 100755 index 0000000..9ae03ca --- /dev/null +++ b/workflow/hpr-get-and-transcribe.bash @@ -0,0 +1,35 @@ +#!/bin/bash + +rsync -av --partial --progress hpr:hub.hackerpublicradio.org/upload/ $HOME/tmp/hpr/processing/ + +find $HOME/tmp/hpr/processing/*_*_????-??-??_* -type d | sort -t _ -k 2 | while read show_dir +do + echo "${show_dir}" + if [ "$( find "${show_dir}" -type f -iname "*srt" | wc -l )" -eq "0" ] + then + cd "${show_dir}" + ls -haltr + find "${show_dir}/" -type f -exec file {} \; | grep -Ei 'audio|mpeg|video|MP4' | awk -F ': ' '{print $1}' | while read this_media + do + whisper --model tiny --language en --output_dir "${show_dir}" "${this_media}" + done + rsync -av --partial --progress "${show_dir}/" hpr:hub.hackerpublicradio.org/upload/$( basename "${show_dir}")/ + fi +done + +rsync -av --partial --progress hpr:hub.hackerpublicradio.org/reserve/ $HOME/tmp/hpr/reserve/ + +find $HOME/tmp/hpr/reserve/*_*_* -type d | sort -t _ -k 2 | while read show_dir +do + echo "${show_dir}" + if [ "$( find "${show_dir}" -type f -iname "*srt" | wc -l )" -eq "0" ] + then + cd "${show_dir}" + ls -haltr + find "${show_dir}/" -type f -exec file {} \; | grep -Ei 'audio|mpeg|video|MP4' | awk -F ': ' '{print $1}' | while read this_media + do + whisper --model tiny --language en --output_dir "${show_dir}" "${this_media}" + done + rsync -av --partial --progress "${show_dir}/" hpr:hub.hackerpublicradio.org/reserve/$( basename "${show_dir}")/ + fi +done diff --git a/workflow/hpr-publish.bash b/workflow/hpr-publish.bash new file mode 100755 index 0000000..b24bda0 --- /dev/null +++ b/workflow/hpr-publish.bash @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/ +#============================================================ + +git_dir="$HOME/tmp/hpr/hpr_generator/sourcecode" +if [ ! -d "${git_dir}/.git" ] +then + git clone gitea@repo.anhonesthost.net:HPR/hpr_generator.git "${git_dir}" +fi + +cd "${git_dir}" + +git pull + +# ssh hpr -t "ls -al /home/hpr/www/hpr.sql;md5sum /home/hpr/www/hpr.sql" +# ssh hpr -t "/home/hpr/bin/hpr_db_backup.bash" +# ssh hpr -t "ls -al /home/hpr/www/hpr.sql;md5sum /home/hpr/www/hpr.sql" + +./utils/update-hpr-db.sh +if [ $? -ne 0 ] +then + echo 'Terminating...' >&2 + exit 1 +fi + +./site-generator --all --verbose +if [ $? -ne 0 ] +then + echo 'Terminating...' >&2 + exit 1 +fi + +rsync -av --partial --progress "${git_dir}/public_html/" hpr:hackerpublicradio.org/public_html +#rsync -av --partial --progress "${git_dir}/public_html/" hobbypublicradio.org:hobbypublicradio.org/ + +cd $HOME/sourcecode/hpr/hpr_hub/ +git pull +cd $HOME/sourcecode/hpr/hpr_hub/sql +split --hex-suffixes --lines=1000 --additional-suffix=.sql hpr.sql hpr-db-part- +cd $HOME/sourcecode/hpr/hpr_hub/ +git add $HOME/sourcecode/hpr/hpr_hub/sql/hpr*sql +git commit -m "$(\date -u +%Y-%m-%d_%H-%M-%SZ_%A ) database changed" +git push +#xdg-open https://hackerpublicradio.org/ diff --git a/workflow/hpr_ccdn_stats.bash b/workflow/hpr_ccdn_stats.bash new file mode 100644 index 0000000..bbab7af --- /dev/null +++ b/workflow/hpr_ccdn_stats.bash @@ -0,0 +1,4 @@ +#!/bin/bash + +yesterday="$( \date -u +%Y-%m-%d -d yesterday)" +echo -e "${yesterday}\t$( grep -Ec "${yesterday}T.*Sending request to" /home/hpr/logs/naughty-ip.txt )" >> /home/hpr/hub/hpr_ccdn_stats.tsv diff --git a/workflow/hpr_db_backup.bash b/workflow/hpr_db_backup.bash new file mode 100755 index 0000000..2958f61 --- /dev/null +++ b/workflow/hpr_db_backup.bash @@ -0,0 +1,48 @@ +#!/bin/bash +sql_save_dir="$HOME/hpr/sql" +credential_file="$HOME/.my.cnf" +last_update_txt="${sql_save_dir}/last_update.txt" +sync_delay="30 mins" + +last_update_query="SELECT update_time FROM information_schema.tables tab WHERE update_time > (current_timestamp() - interval 30 day) AND table_type = 'BASE TABLE' AND table_name not in ('reservations') AND table_schema not in ('information_schema', 'sys', 'performance_schema','mysql') ORDER BY update_time ASC LIMIT 1;" + +if [ ! -s "${credential_file}" ] +then + echo "The file \"${credential_file}\" is missing" + exit +fi + +if [ -s "${last_update_txt}" ] +then + echo "Found the last update file \"${last_update_txt}\"" + local_db_last_update_iso8601="$( \date --utc --date="$( cat ${last_update_txt} )" +%Y-%m-%dT%H:%M:%SZ )" + local_db_last_update_epoch="$( \date --utc --date="$( cat ${last_update_txt} )" +%s )" + echo "Latest change saved locally is ${local_db_last_update_iso8601} (${local_db_last_update_epoch})" +fi + +mysql --disable-column-names --batch --execute="${last_update_query}" | grep -v update_time | head -1 > ${last_update_txt} + +if [ ! -s "${last_update_txt}" ] +then + echo "The file \"${last_update_txt}\" is missing" + exit +fi + +hpr_db_last_update_iso8601="$( \date --utc --date="$( cat ${last_update_txt} )" +%Y-%m-%dT%H:%M:%SZ )" +hpr_db_last_update_epoch="$( \date --utc --date="$( cat ${last_update_txt} )" +%s )" +echo "Latest change on the HPR website database is ${hpr_db_last_update_iso8601} (${hpr_db_last_update_epoch})" + +#TODO check that the db is greater + + +# # # if [ "${hpr_db_last_update_epoch}" -eq "${local_db_last_update_epoch}" ] +# # # then +# # # echo "The file \"${last_update_txt}\" is missing" +# # # exit +# # # fi + +mysqldump --tz-utc --add-drop-database --extended-insert --complete-insert --skip-extended-insert --default-character-set=utf8 --single-transaction --skip-set-charset --databases hpr_hpr > "${sql_save_dir}/hpr_hpr_full.sql" +mysqldump --tz-utc --add-drop-database --complete-insert --extended-insert --default-character-set=utf8 --single-transaction --skip-set-charset --databases hpr_hpr --ignore-table=hpr_hpr.reservations > "${sql_save_dir}/hpr.sql" +mysqldump --tz-utc --add-drop-database --databases hpr_hpr> "${sql_save_dir}/mysqldump.sql" + +rsync -av --partial --progress "${sql_save_dir}/hpr.sql" hpr:/docker/users/hpr/hackerpublicradio.org/public_html/hpr.sql diff --git a/workflow/mdb.bash b/workflow/mdb.bash new file mode 100755 index 0000000..0bdf3eb --- /dev/null +++ b/workflow/mdb.bash @@ -0,0 +1,4 @@ +#!/bin/bash +pw=$(grep -E '^\$databasePassword = ' /home/hpr/php/credentials.php | awk -F "'" '{print $2}' ) +pw=${pw##* } +mysql --host=localhost --user=hpr_hpr --password="$pw" hpr_hpr diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index 2d2caa2..a8000c1 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -518,10 +518,10 @@ function check_variable_is_correct() { then echo_error "The \"shownotes_json\" file is missing." fi - if [ "$( file --brief --mime-type "${shownotes_json}" | grep --count 'application/json' )" -ne "1" ] - then - echo_error "The \"shownotes_json\" variable has not a valid \"application/json\" mime type." - fi +# if [ "$( file --brief --mime-type "${shownotes_json}" | grep --count 'application/json' )" -ne "1" ] +# then +# echo_error "The \"shownotes_json\" variable has not a valid \"application/json\" mime type." +# fi jq '.' "${shownotes_json}" >/dev/null 2>&1 if [ $? -ne 0 ] then @@ -637,7 +637,7 @@ function get_next_show_from_hpr_hub() { email="$( echo ${response} | awk '{print $6}' )" email_unpadded="$( echo $email | sed 's/.nospam@nospam./@/g' )" - hpr_upload_dir="/home/hpr/upload/${timestamp_epoc}_${ep_num}_${ep_date}_${key}" + hpr_upload_dir="hub.hackerpublicradio.org/upload/${timestamp_epoc}_${ep_num}_${ep_date}_${key}" source_dir="hpr:${hpr_upload_dir}" dest_dir="${timestamp_epoc}_${ep_num}_${ep_date}_${key}" working_dir="${processing_dir}/${dest_dir}" diff --git a/workflow/run-speedtest.bash b/workflow/run-speedtest.bash new file mode 100644 index 0000000..f90885a --- /dev/null +++ b/workflow/run-speedtest.bash @@ -0,0 +1,6 @@ +#!/bin/bash + +# * * * * * /usr/local/bin/run-speedtest.bash >/dev/null 2>&1 + +speedtest-cli --json | jq '.' > /var/www/html/speedtest.json +chown apache:apache /var/www/html/speedtest.json diff --git a/workflow/update-stats.bash b/workflow/update-stats.bash new file mode 100644 index 0000000..752c13e --- /dev/null +++ b/workflow/update-stats.bash @@ -0,0 +1,8 @@ +#!/bin/bash + +hub_dir="/home/hpr/hub" + +for format in txt csv json xml +do + curl --silent --netrc-file /home/hpr/.netrc https://hub.hackerpublicradio.org/cms/stats.php?format=${format} --output ${hub_dir}/stats.${format} +done From 16afecfb8270f0641ff5c7d9c8fa910b147a8c65 Mon Sep 17 00:00:00 2001 From: Ken Fallon Date: Wed, 10 Sep 2025 21:23:37 +0200 Subject: [PATCH 2/5] Automating sql generation on db change --- workflow/hpr_db_backup.bash | 158 ++++++++++++++++++++++++++++------ workflow/process_episode.bash | 5 +- 2 files changed, 135 insertions(+), 28 deletions(-) diff --git a/workflow/hpr_db_backup.bash b/workflow/hpr_db_backup.bash index 2958f61..b420c40 100755 --- a/workflow/hpr_db_backup.bash +++ b/workflow/hpr_db_backup.bash @@ -1,10 +1,38 @@ #!/bin/bash -sql_save_dir="$HOME/hpr/sql" -credential_file="$HOME/.my.cnf" -last_update_txt="${sql_save_dir}/last_update.txt" -sync_delay="30 mins" -last_update_query="SELECT update_time FROM information_schema.tables tab WHERE update_time > (current_timestamp() - interval 30 day) AND table_type = 'BASE TABLE' AND table_name not in ('reservations') AND table_schema not in ('information_schema', 'sys', 'performance_schema','mysql') ORDER BY update_time ASC LIMIT 1;" +# 5,20,35,50 * * * * $HOME/userfiles/sql/hpr_db_backup.bash >> $HOME/userfiles/sql/cron.log 2>&1 & + +#TODO +#add a rss feed with the latest updates. Linking to the changes on gittea. +# run that every 5 minutes and then if there is a change sleep 5 and confirm there has been no change since. +# Then download the db and regenerate the site. +# While making sure to generate the site at least once a day +# check the skip-extended-insert export into of the sql into git, so that the small diffs show up. +# generate a rss feed with the latest changes. +# update all the other scripts reserve, stats etc. + + +sql_save_dir="$HOME/userfiles/sql" +credential_file="${sql_save_dir}/.my-hpr.cnf" +last_update_txt="${sql_save_dir}/last_update.txt" +hpr_full_sql="${sql_save_dir}/hpr_hpr_full.sql" +hpr_site_sql="${sql_save_dir}/hpr.sql" +full_mysqldump_sql="${sql_save_dir}/mysqldump.sql" + +sync_delay_seconds="300" # 5 minutes + +last_update_query="SELECT + update_time +FROM + information_schema.tables tab +WHERE + update_time > (current_timestamp() - interval 30 day) + AND table_type = 'BASE TABLE' + AND table_name not in ('reservations') + AND table_schema not in ('information_schema', 'sys', 'performance_schema', 'mysql') +ORDER BY + update_time DESC +LIMIT 1;" if [ ! -s "${credential_file}" ] then @@ -12,37 +40,117 @@ then exit fi +local_db_last_update_epoch="0" if [ -s "${last_update_txt}" ] then echo "Found the last update file \"${last_update_txt}\"" local_db_last_update_iso8601="$( \date --utc --date="$( cat ${last_update_txt} )" +%Y-%m-%dT%H:%M:%SZ )" local_db_last_update_epoch="$( \date --utc --date="$( cat ${last_update_txt} )" +%s )" - echo "Latest change saved locally is ${local_db_last_update_iso8601} (${local_db_last_update_epoch})" + echo -e "Local DB update time is\t${local_db_last_update_iso8601} (${local_db_last_update_epoch})" fi -mysql --disable-column-names --batch --execute="${last_update_query}" | grep -v update_time | head -1 > ${last_update_txt} - -if [ ! -s "${last_update_txt}" ] +live_db_last_update_iso8601="$( mysql --defaults-file="${credential_file}" --disable-column-names --batch --execute="${last_update_query}" | sed -e 's/ /T/g' -e 's/$/Z/g' )" +if [ -z "${live_db_last_update_iso8601}" ] then - echo "The file \"${last_update_txt}\" is missing" - exit + echo "The live db update time \"live_db_last_update_iso8601\" is missing" + exit 1 fi -hpr_db_last_update_iso8601="$( \date --utc --date="$( cat ${last_update_txt} )" +%Y-%m-%dT%H:%M:%SZ )" -hpr_db_last_update_epoch="$( \date --utc --date="$( cat ${last_update_txt} )" +%s )" -echo "Latest change on the HPR website database is ${hpr_db_last_update_iso8601} (${hpr_db_last_update_epoch})" +live_db_last_update_epoch="$( \date --utc --date="${live_db_last_update_iso8601}" +%s )" +if [ -z "${live_db_last_update_epoch}" ] +then + echo "The live db update time \"live_db_last_update_epoch\" is missing" + exit 2 +fi -#TODO check that the db is greater +echo -e "Live DB update time is\t${live_db_last_update_iso8601} (${live_db_last_update_epoch})" + +if [ "${local_db_last_update_epoch}" -eq "${live_db_last_update_epoch}" ] +then + echo "No changes detected. Skipping export." + exit 0 +fi + +echo "Starting export full with -complete-insert." +if [ -s "${hpr_full_sql}" ] +then + hpr_full_sql_write_time_iso8601="$( \date --utc --date="$( ls -al --full-time "${hpr_full_sql}" | awk '{print $6, $7, $8}' )" +%Y-%m-%dT%H:%M:%SZ )" + hpr_full_sql_write_time_epoch="$( \date --utc --date="${hpr_full_sql_write_time_iso8601}" +%s )" + if [ -z "${hpr_full_sql_write_time_epoch}" ] + then + echo "The live db update time \"hpr_full_sql_write_time_epoch\" is missing" + exit 3 + fi + echo -e "Full DB write time is\t${hpr_full_sql_write_time_iso8601} (${hpr_full_sql_write_time_epoch})" + + hpr_full_sql_write_time_with_delay_epoch="$(( ${hpr_full_sql_write_time_epoch} + ${sync_delay_seconds} ))" + time_now_epoch="$( \date --utc +%s )" + + if [ "${hpr_full_sql_write_time_with_delay_epoch}" -gt "${time_now_epoch}" ] + then + echo "Skipping export. The Database has been recently created \"${hpr_full_sql_write_time_iso8601}\". Try again after $( \date --utc --date="@${hpr_full_sql_write_time_with_delay_epoch}" +%Y-%m-%dT%H:%M:%SZ )." + exit 4 + fi +fi +mysqldump --defaults-file="${credential_file}" --tz-utc --add-drop-database --extended-insert --complete-insert --skip-extended-insert --default-character-set=utf8 --single-transaction --skip-set-charset --databases hpr_hpr > "${hpr_full_sql}" +tail "${hpr_full_sql}" | grep 'Dump completed on' + +echo "Starting export full for static site generation." +if [ -s "${hpr_site_sql}" ] +then + hpr_site_sql_write_time_iso8601="$( \date --utc --date="$( ls -al --full-time "${hpr_site_sql}" | awk '{print $6, $7, $8}' )" +%Y-%m-%dT%H:%M:%SZ )" + hpr_site_sql_write_time_epoch="$( \date --utc --date="${hpr_site_sql_write_time_iso8601}" +%s )" + if [ -z "${hpr_site_sql_write_time_epoch}" ] + then + echo "The live db update time \"hpr_site_sql_write_time_epoch\" is missing" + exit 5 + fi + echo -e "Full DB write time is\t${hpr_site_sql_write_time_iso8601} (${hpr_site_sql_write_time_epoch})" + + hpr_site_sql_write_time_with_delay_epoch="$(( ${hpr_site_sql_write_time_epoch} + ${sync_delay_seconds} ))" + time_now_epoch="$( \date --utc +%s )" + + if [ "${hpr_site_sql_write_time_with_delay_epoch}" -gt "${time_now_epoch}" ] + then + echo "Skipping export. The Database has been recently created \"${hpr_site_sql_write_time_iso8601}\". Try again after $( \date --utc --date="@${hpr_site_sql_write_time_with_delay_epoch}" +%Y-%m-%dT%H:%M:%SZ )." + exit 6 + fi +fi +mysqldump --defaults-file="${credential_file}" --tz-utc --add-drop-database --complete-insert --extended-insert --default-character-set=utf8 --single-transaction --skip-set-charset --databases hpr_hpr --ignore-table=hpr_hpr.reservations > "${hpr_site_sql}" +tail "${hpr_site_sql}" | grep 'Dump completed on' + +echo "Starting export full for data recovery." +if [ -s "${full_mysqldump_sql}" ] +then + full_mysqldump_sql_write_time_iso8601="$( \date --utc --date="$( ls -al --full-time "${full_mysqldump_sql}" | awk '{print $6, $7, $8}' )" +%Y-%m-%dT%H:%M:%SZ )" + full_mysqldump_sql_write_time_epoch="$( \date --utc --date="${full_mysqldump_sql_write_time_iso8601}" +%s )" + if [ -z "${full_mysqldump_sql_write_time_epoch}" ] + then + echo "The live db update time \"full_mysqldump_sql_write_time_epoch\" is missing" + exit 5 + fi + echo -e "Full DB write time is\t${full_mysqldump_sql_write_time_iso8601} (${full_mysqldump_sql_write_time_epoch})" + + full_mysqldump_sql_write_time_with_delay_epoch="$(( ${full_mysqldump_sql_write_time_epoch} + ${sync_delay_seconds} ))" + time_now_epoch="$( \date --utc +%s )" + + if [ "${full_mysqldump_sql_write_time_with_delay_epoch}" -gt "${time_now_epoch}" ] + then + echo "Skipping export. The Database has been recently created \"${full_mysqldump_sql_write_time_iso8601}\". Try again after $( \date --utc --date="@${full_mysqldump_sql_write_time_with_delay_epoch}" +%Y-%m-%dT%H:%M:%SZ )." + exit 6 + fi +fi +mysqldump --defaults-file="${credential_file}" --tz-utc --add-drop-database --databases hpr_hpr> "${full_mysqldump_sql}" +tail "${full_mysqldump_sql}" | grep 'Dump completed on' + +if [ $HOSTNAME = "whp01.cloud-hosting.io" ] +then + cp -v "${hpr_site_sql}" $HOME/hackerpublicradio.org/public_html/hpr.sql +else + rsync -av --partial --progress ${hpr_site_sql} hpr:hackerpublicradio.org/public_html/hpr.sql +fi + +echo "${live_db_last_update_iso8601}" > "${last_update_txt}" -# # # if [ "${hpr_db_last_update_epoch}" -eq "${local_db_last_update_epoch}" ] -# # # then -# # # echo "The file \"${last_update_txt}\" is missing" -# # # exit -# # # fi -mysqldump --tz-utc --add-drop-database --extended-insert --complete-insert --skip-extended-insert --default-character-set=utf8 --single-transaction --skip-set-charset --databases hpr_hpr > "${sql_save_dir}/hpr_hpr_full.sql" -mysqldump --tz-utc --add-drop-database --complete-insert --extended-insert --default-character-set=utf8 --single-transaction --skip-set-charset --databases hpr_hpr --ignore-table=hpr_hpr.reservations > "${sql_save_dir}/hpr.sql" -mysqldump --tz-utc --add-drop-database --databases hpr_hpr> "${sql_save_dir}/mysqldump.sql" - -rsync -av --partial --progress "${sql_save_dir}/hpr.sql" hpr:/docker/users/hpr/hackerpublicradio.org/public_html/hpr.sql diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index a8000c1..abfb884 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -91,7 +91,7 @@ function program_checks() { done } - is_installed audio2image.bash awk base64 cat csvtojson curl date detox eval extract_images ffprobe ffmpeg file find grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget hpr-check-ccdn-links + is_installed audio2image.bash awk base64 cat csvtojson curl date eval extract_images ffprobe ffmpeg file find grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget hpr-check-ccdn-links for arg in $* do @@ -647,7 +647,6 @@ function get_next_show_from_hpr_hub() { echo_debug "Downloading hpr${ep_num} from ${email_unpadded}" echo_debug "" echo_debug "rsync -ave ssh --partial --progress ${source_dir}/ ${working_dir}/" - ssh hpr -t "detox -v ${hpr_upload_dir}/" rsync -ave ssh --partial --progress ${source_dir}/ ${working_dir}/ } @@ -1032,7 +1031,7 @@ function media_checks() { shownotes_srt="${media%.*}.srt" if [[ -z "${shownotes_srt}" || ! -s "${shownotes_srt}" ]] then - echo_error "Could not find the subtitles for media \"${media}\"" + echo_error "Could not find the subtitles for media \"${media}\" in \"${shownotes_srt}\"" fi #TODO fix close duration From 2b4ef438b394dbbd39b4078ce655c7aa8d706b36 Mon Sep 17 00:00:00 2001 From: Ken Fallon Date: Thu, 11 Sep 2025 17:50:31 +0200 Subject: [PATCH 3/5] Added SQL rss feed, Fixed regex for image, and scp files to hpr server --- workflow/hpr_db_backup.bash | 46 ++++++++++++++++++++++++++++++--- workflow/process_episode.bash | 48 +++++++++++++++++++++++++++++++++-- 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/workflow/hpr_db_backup.bash b/workflow/hpr_db_backup.bash index b420c40..4bb3733 100755 --- a/workflow/hpr_db_backup.bash +++ b/workflow/hpr_db_backup.bash @@ -18,6 +18,7 @@ last_update_txt="${sql_save_dir}/last_update.txt" hpr_full_sql="${sql_save_dir}/hpr_hpr_full.sql" hpr_site_sql="${sql_save_dir}/hpr.sql" full_mysqldump_sql="${sql_save_dir}/mysqldump.sql" +hpr_database_file="hackerpublicradio.org/public_html/hpr.sql" sync_delay_seconds="300" # 5 minutes @@ -143,14 +144,51 @@ fi mysqldump --defaults-file="${credential_file}" --tz-utc --add-drop-database --databases hpr_hpr> "${full_mysqldump_sql}" tail "${full_mysqldump_sql}" | grep 'Dump completed on' +echo " + + + Hacker Public Radio ~ Database Feed + http://hackerpublicradio.org/about.html + This Feed provides information the latest version of the HPR database. + en-us + Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) License + feedback.nospam@nospam.hackerpublicradio.org (HPR Feedback) + admin.nospam@nospam.hackerpublicradio.org (HPR Webmaster) + https://repo.anhonesthost.net/HPR/hpr-tools/src/branch/main/workflow/hpr_db_backup.bash + https://www.rssboard.org/rss-specification + 15 + + http://hackerpublicradio.org/images/hpr_feed_small.png + Hacker Public Radio ~ Database Feed + http://hackerpublicradio.org/about.html + The Hacker Public Radio Old Microphone Logo + 164 + 144 + + + $( date --utc --rfc-email ) + + Export of the Public mariadb SQL for ${live_db_last_update_iso8601} + admin.nospam@nospam.hackerpublicradio.org (Janitors) + http://hackerpublicradio.org/hpr.sql#${live_db_last_update_iso8601} + + $( date --utc --rfc-email ) + + sha1sum:$( sha1sum "${hpr_site_sql}" | awk '{print $1}' ),md5sum:$( md5sum "${hpr_site_sql}" | awk '{print $1}' ) + + +" > "${hpr_site_sql}.rss" + if [ $HOSTNAME = "whp01.cloud-hosting.io" ] then - cp -v "${hpr_site_sql}" $HOME/hackerpublicradio.org/public_html/hpr.sql + cp -v "${hpr_site_sql}" "$HOME/${hpr_database_file}" + cp -v "${hpr_site_sql}.rss" "$HOME/${hpr_database_file}.rss" else - rsync -av --partial --progress ${hpr_site_sql} hpr:hackerpublicradio.org/public_html/hpr.sql + rsync -av --partial --progress ${hpr_site_sql} hpr:${hpr_database_file} + rsync -av --partial --progress ${hpr_site_sql}.rss hpr:${hpr_database_file}.rss fi - + echo "${live_db_last_update_iso8601}" > "${last_update_txt}" - +echo "Finished export of \"${live_db_last_update_iso8601}\"" diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index abfb884..c1612e3 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -869,7 +869,7 @@ function extract_images_brute_force() { # Download referenced images image_count_external="1" - for image in $( grep --color=never --perl-regexp --only-matching '' "${shownotes_html}" | awk -F 'src=' '{print $2}' | awk -F '"' '{print $2}' ) + for image in $( grep --color=never --perl-regexp --only-matching '' "${shownotes_html}" | awk -F 'src=' '{print $2}' | awk -F '"' '{print $2}' ) do this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" wget "${image}" --output-document=${this_image} @@ -1967,7 +1967,7 @@ function register_assets() { } ################################################# -# Register the assets with the hpr database +# Copy the files to the origin server function copy_files_to_origin_server() { @@ -2013,6 +2013,48 @@ function copy_files_to_origin_server() { done } +################################################# +# Copy the images to the hpr server + +function copy_files_to_hpr_server() { + + echo_debug "Copying the files to the hpr server. copy_files_to_hpr_server()" + + check_variable_is_correct working_dir ep_num + + # Get the file list from the HPR db + if [ -z "${files_json}" ] + then + files_json="${working_dir}/hpr${ep_num}_files.json" + fi + + if [ -s "${files_json}" ] + then + echo_debug "Removing stale \"files_json\" file \"${files_json}\"." + rm -v "${files_json}" + fi + + echo_debug "Getting the \"files_json\" file from the HPR site." + if [ "$( curl --silent --netrc --write-out '%{http_code}' https://hub.hackerpublicradio.org/cms/assets.php?id=${ep_num} --output "${files_json}" )" != 200 ] + then + echo_error "The Episode hpr${ep_num} has not been posted" + fi + check_variable_is_correct files_json + + ssh hpr -t "mkdir -v hackerpublicradio.org/public_html/eps/hpr${ep_num}" + + for this_file in $( jq --raw-output ".hpr${ep_num} | keys | @tsv" "${files_json}" ) + do + skip_file="$( echo "$this_file" | grep --count --perl-regexp "hpr${ep_num}.flac|hpr${ep_num}.mp3|hpr${ep_num}.ogg|hpr${ep_num}.opus|hpr${ep_num}.spx|hpr${ep_num}.srt|hpr${ep_num}.txt|hpr${ep_num}.wav" )" + if [ "${skip_file}" -eq "1" ] + then + continue + fi + scp "$this_file" hpr:hackerpublicradio.org/public_html/eps/hpr${ep_num}/ + done + +} + ################################################# # Send the derived files to the server borg to be sent to borg @@ -2334,6 +2376,8 @@ copy_files_to_origin_server check_variable_is_correct working_dir ep_num shownotes_edited +copy_files_to_hpr_server + copy_derived_files_to_borg create_item_on_the_internet_archive From 8efc6459c39604a26e0e291f73cd57a9d5922632 Mon Sep 17 00:00:00 2001 From: Ken Fallon Date: Mon, 15 Sep 2025 20:15:59 +0200 Subject: [PATCH 4/5] Correct location for the new server --- workflow/process_episode.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index c1612e3..424b606 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -2050,7 +2050,7 @@ function copy_files_to_hpr_server() { then continue fi - scp "$this_file" hpr:hackerpublicradio.org/public_html/eps/hpr${ep_num}/ + scp "${working_dir}/$this_file" hpr:hackerpublicradio.org/public_html/eps/hpr${ep_num}/ done } From d19fe2e4bf75d5d72a78257ca6f06e3d85ecb151 Mon Sep 17 00:00:00 2001 From: Ken Fallon Date: Sun, 21 Sep 2025 19:19:20 +0200 Subject: [PATCH 5/5] Cron automations --- workflow/check_feeds.bash | 26 ++++++++ workflow/export-jsons-stats.bash | 8 +++ workflow/hpr-publish.bash | 111 ++++++++++++++++++++++++++----- workflow/hpr_db_backup.bash | 2 + workflow/process_episode.bash | 1 - 5 files changed, 131 insertions(+), 17 deletions(-) create mode 100755 workflow/check_feeds.bash create mode 100755 workflow/export-jsons-stats.bash diff --git a/workflow/check_feeds.bash b/workflow/check_feeds.bash new file mode 100755 index 0000000..44211be --- /dev/null +++ b/workflow/check_feeds.bash @@ -0,0 +1,26 @@ +#!/bin/bash + +feed_dir="$HOME/tmp/hpr/rss" + +wget https://hackerpublicradio.org/hpr_opus_rss.php -O "${feed_dir}/2_opus.xml" +wget https://hackerpublicradio.org/hpr_ogg_rss.php -O "${feed_dir}/2_ogg.xml" +wget https://hackerpublicradio.org/hpr_rss.php -O "${feed_dir}/2_mp3.xml" +wget https://hackerpublicradio.org/hpr_total_opus_rss.php -O "${feed_dir}/f_opus.xml" +wget https://hackerpublicradio.org/hpr_total_ogg_rss.php -O "${feed_dir}/f_ogg.xml" +wget https://hackerpublicradio.org/hpr_total_rss.php -O "${feed_dir}/f_mp3.xml" +wget https://hackerpublicradio.org/rss-future.php -O "${feed_dir}/ff.xml" +wget https://hackerpublicradio.org/comments.rss -O "${feed_dir}/comments.xml" + +cd "${feed_dir}" + +for feed in *.xml +do + echo "Checking ${feed}" + xmllint --format "${feed}" >/dev/null 2>&1 + if [ "${?}" -ne "0" ] + then + echo "Error: The rss feed \"${feed}\" is not correct" + xmllint --format "${feed}" + exit 1 + fi +done diff --git a/workflow/export-jsons-stats.bash b/workflow/export-jsons-stats.bash new file mode 100755 index 0000000..a74d7bd --- /dev/null +++ b/workflow/export-jsons-stats.bash @@ -0,0 +1,8 @@ +#!/bin/bash + +# 6,21,36,51 * * * * /docker/users/hpr/userfiles/bin/export-jsons-stats.bash > /dev/null 2>&1 & + +net_rc="$HOME/userfiles/.netrc" +output_file="$HOME/hub.hackerpublicradio.org/public_html/stats.json" + +curl --silent --netrc-file ${net_rc} https://hub.hackerpublicradio.org/cms/stats.php --output "${output_file}" diff --git a/workflow/hpr-publish.bash b/workflow/hpr-publish.bash index b24bda0..4bd8c0a 100755 --- a/workflow/hpr-publish.bash +++ b/workflow/hpr-publish.bash @@ -2,16 +2,80 @@ # Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/ #============================================================ -git_dir="$HOME/tmp/hpr/hpr_generator/sourcecode" -if [ ! -d "${git_dir}/.git" ] +working_dir="$HOME/tmp/hpr/hpr_generator/sourcecode" +hpr_sql_rss="${working_dir}/hpr_sql_rss.xml" +previous_update_txt="${working_dir}/last_update.txt" + +if [ ! -d "${working_dir}/.git" ] then - git clone gitea@repo.anhonesthost.net:HPR/hpr_generator.git "${git_dir}" + mkdir --parents --verbose "${working_dir}" + git clone gitea@repo.anhonesthost.net:HPR/hpr_generator.git "${working_dir}" fi -cd "${git_dir}" - +cd "${working_dir}" git pull +function get_latest_update_time() { + if [ "$( curl --silent --netrc-file ${HOME}/.netrc --write-out '%{http_code}' https://hackerpublicradio.org/hpr.sql.rss --output "${hpr_sql_rss}" )" != 200 ] + then + echo "Could not get a list of the queue status from \"https://hackerpublicradio.org/hpr.sql.rss\"" + fi + + if [[ ! -s "${hpr_sql_rss}" || -z "${hpr_sql_rss}" ]] + then + echo "The \"hpr_sql_rss\" variable/file is missing." + exit + fi + if [ "$( file --brief --mime-type "${hpr_sql_rss}" | grep --count 'text/xml' )" -ne "1" ] + then + echo "The \"hpr_sql_rss\" variable has not a valid \"text/xml\" mime type." + exit + fi + xmllint --format "${hpr_sql_rss}" >/dev/null 2>&1 + if [ $? -ne 0 ] + then + echo "The file \"${hpr_sql_rss}\" is not valid xml." + exit + fi + + current_update_iso8601="$( \date --utc --date="$( xmlstarlet sel --text --template --match '/rss/channel/pubDate' --value-of '.' --nl "${hpr_sql_rss}" )" +%Y-%m-%dT%H:%M:%SZ )" + if [ -z "${current_update_iso8601}" ] + then + echo "The \"current_update_iso8601\" variable is missing." + exit + fi + + current_update_epoch="$( \date --utc --date="${current_update_iso8601}" +%s )" + if [ -z "${current_update_epoch}" ] + then + echo "The \"current_update_epoch\" variable/file is missing." + exit + fi + echo -e "Current update time is\t${current_update_iso8601} (${current_update_epoch})" + + if [ -s "${previous_update_txt}" ] + then + #echo "Found the last update file \"${previous_update_txt}\"" + previous_update_iso8601="$( \date --utc --date="$( cat "${previous_update_txt}" )" +%Y-%m-%dT%H:%M:%SZ )" + previous_update_epoch="$( \date --utc --date="$( cat "${previous_update_txt}" )" +%s )" + else + #echo "Did not find the last update file \"${previous_update_txt}\"" + previous_update_iso8601="" + previous_update_epoch="0" + fi + + echo -e "Previous update time is\t${previous_update_iso8601} (${previous_update_epoch})" + + echo "${current_update_iso8601}" > "${previous_update_txt}" + +} + +get_latest_update_time + + +# if [ "${current_update_epoch}" -eq "previous_update_epoch" +# exit + # ssh hpr -t "ls -al /home/hpr/www/hpr.sql;md5sum /home/hpr/www/hpr.sql" # ssh hpr -t "/home/hpr/bin/hpr_db_backup.bash" # ssh hpr -t "ls -al /home/hpr/www/hpr.sql;md5sum /home/hpr/www/hpr.sql" @@ -30,15 +94,30 @@ then exit 1 fi -rsync -av --partial --progress "${git_dir}/public_html/" hpr:hackerpublicradio.org/public_html -#rsync -av --partial --progress "${git_dir}/public_html/" hobbypublicradio.org:hobbypublicradio.org/ +cd "${working_dir}/public_html" -cd $HOME/sourcecode/hpr/hpr_hub/ -git pull -cd $HOME/sourcecode/hpr/hpr_hub/sql -split --hex-suffixes --lines=1000 --additional-suffix=.sql hpr.sql hpr-db-part- -cd $HOME/sourcecode/hpr/hpr_hub/ -git add $HOME/sourcecode/hpr/hpr_hub/sql/hpr*sql -git commit -m "$(\date -u +%Y-%m-%d_%H-%M-%SZ_%A ) database changed" -git push -#xdg-open https://hackerpublicradio.org/ +for feed in comments.rss +do + xmllint --format "${feed}" >/dev/null 2>&1 + if [ "${?}" -ne "0" ] + then + echo "Error: The rss feed \"${feed}\" is not correct" + xmllint --format "${feed}" + exit 1 + fi +done + +cd "${working_dir}" + +rsync -av --partial --progress "${working_dir}/public_html/" hpr:hackerpublicradio.org/public_html +#rsync -av --partial --progress "${working_dir}/public_html/" hobbypublicradio.org:hobbypublicradio.org/ + +# # # cd $HOME/sourcecode/hpr/hpr_hub/ +# # # git pull +# # # cd $HOME/sourcecode/hpr/hpr_hub/sql +# # # split --hex-suffixes --lines=1000 --additional-suffix=.sql hpr.sql hpr-db-part- +# # # cd $HOME/sourcecode/hpr/hpr_hub/ +# # # git add $HOME/sourcecode/hpr/hpr_hub/sql/hpr*sql +# # # git commit -m "$(\date -u +%Y-%m-%d_%H-%M-%SZ_%A ) database changed" +# # # git push +# # # #xdg-open https://hackerpublicradio.org/ diff --git a/workflow/hpr_db_backup.bash b/workflow/hpr_db_backup.bash index 4bb3733..8757109 100755 --- a/workflow/hpr_db_backup.bash +++ b/workflow/hpr_db_backup.bash @@ -2,6 +2,8 @@ # 5,20,35,50 * * * * $HOME/userfiles/sql/hpr_db_backup.bash >> $HOME/userfiles/sql/cron.log 2>&1 & +# https://hackerpublicradio.org/hpr.sql.rss + #TODO #add a rss feed with the latest updates. Linking to the changes on gittea. # run that every 5 minutes and then if there is a change sleep 5 and confirm there has been no change since. diff --git a/workflow/process_episode.bash b/workflow/process_episode.bash index 424b606..1763420 100755 --- a/workflow/process_episode.bash +++ b/workflow/process_episode.bash @@ -719,7 +719,6 @@ function get_working_dir() { } - ################################################# # Once the working_dir is known, set the other variables