forked from HPR/hpr-tools
		
	The show processing needs to be refactored #5
This commit is contained in:
		| @@ -73,7 +73,7 @@ function program_checks() { | ||||
|     done | ||||
|   } | ||||
|  | ||||
|   is_installed audio2image.bash awk base64 cat csvtojson curl date detox eval extract_images ffprobe file find grep grep head jq kate magick mediainfo mv realpath rsync seamonkey sed sed sort sponge ssh touch touch wget | ||||
|   is_installed audio2image.bash awk base64 cat csvtojson curl date detox eval extract_images ffprobe file find grep head jq kate magick mediainfo mv realpath remove-image.pl rsync seamonkey sed sed sort sponge ssh touch touch wget | ||||
|  | ||||
|   for arg in $* | ||||
|   do | ||||
| @@ -335,6 +335,12 @@ function extract_images_brute_force() { | ||||
|    | ||||
|   echo_debug "Extracting images with grep." | ||||
|    | ||||
|   if [ -s "${shownotes_html%.*}_edited.html" ] | ||||
|   then | ||||
|     echo_debug "There is already an edited version of the shownotes at \"${shownotes_html%.*}_edited.html\", slipping image extraction." | ||||
|     return | ||||
|   fi | ||||
|    | ||||
|   if [[ -z "${shownotes_html}" || ! -s "${shownotes_html}" ]] | ||||
|   then | ||||
|     echo_error "The shownotes_html file \"${shownotes_html}\" could not be found." | ||||
| @@ -344,49 +350,120 @@ function extract_images_brute_force() { | ||||
|  | ||||
|   sed "s#>#>\n#g" "${shownotes_html}" | sponge "${shownotes_html}" | ||||
|  | ||||
|   image_count="1" | ||||
|    | ||||
|   # Extract embedded images | ||||
|   image_count_embedded="1" | ||||
|  | ||||
|   for image in $( grep --color=never --perl-regexp --only-matching 'data:image/[^;]*;base64,\K[a-zA-Z0-9+/=]*' "${shownotes_html}" ) | ||||
|   do | ||||
|     this_image="${working_dir}/hpr${ep_num}_image_${image_count}" | ||||
|     this_image="${working_dir}/hpr${ep_num}_image_${image_count_embedded}" | ||||
|     echo -n "$image" | base64 -di > ${this_image} | ||||
|     this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" | ||||
|     mv -v "${this_image}" "${this_image}.${this_ext}" | ||||
|     this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" | ||||
|     if [ "${this_width}" -gt "400" ] | ||||
|     then | ||||
|       echo_debug "Generating thumbnail for embedded image \"${this_image}.${this_ext}\"." | ||||
|       magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" | ||||
|     fi | ||||
|     ((image_count=image_count+1)) | ||||
|     ((image_count_embedded=image_count_embedded+1)) | ||||
|   done | ||||
|    | ||||
|   # Download referenced images | ||||
|   image_count_external="1" | ||||
|  | ||||
|   for image in $( grep --color=never --perl-regexp --only-matching '<img.*src.*http.*>' "${shownotes_html}" | awk -F 'src=' '{print $2}' | awk -F '"' '{print $2}' ) | ||||
|   do | ||||
|     this_image="${working_dir}/hpr${ep_num}_image_${image_count}" | ||||
|     this_image="${working_dir}/hpr${ep_num}_image_ext_${image_count_external}" | ||||
|     wget "${image}" --output-document=${this_image} | ||||
|     this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )" | ||||
|     if [ ! -e "${this_image}.${this_ext}" ] | ||||
|     then | ||||
|       mv -v "${this_image%.*}" "${this_image}.${this_ext}" | ||||
|     fi | ||||
|     mv -v "${this_image%.*}" "${this_image}.${this_ext}" | ||||
|     this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )" | ||||
|     if [ "${this_width}" -gt "400" ] | ||||
|     then | ||||
|       echo_debug "Generating thumbnail for external image \"${this_image}.${this_ext}\"." | ||||
|       magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}" | ||||
|     fi | ||||
|     ((image_count=image_count+1)) | ||||
|     ((image_count_external=image_count_external+1)) | ||||
|   done | ||||
|    | ||||
|   # TODO Link up image_count > TODO Link up image_count by looping `<img` tags with images found on the disk. | ||||
|   cat "${shownotes_html}" | remove-image.pl | sponge "${shownotes_html}" | ||||
|    | ||||
|   #grep --color=never --perl-regexp --invert-match 'data:image/[^;]*;base64,\K[a-zA-Z0-9+/=]*' "${shownotes_html}" | ||||
|   if [ "${image_count_embedded}" -gt "1" ] | ||||
|   then | ||||
|     image_count_embedded="1" | ||||
|      | ||||
|     touch "${shownotes_html}.embedded_images" | ||||
|      | ||||
|     cat "${shownotes_html}" | while read this_line | ||||
|     do | ||||
|       if [ "$( echo "${this_line}" | grep --count "LOCAL_IMAGE_REMOVED" )" -eq "0" ] | ||||
|       then | ||||
|         echo "${this_line}" >> "${shownotes_html}.embedded_images" | ||||
|       else | ||||
|         this_image="$( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_${image_count_embedded}.*" )" | ||||
|         if [[ -z "${this_image}" || ! -s "${this_image}" ]] | ||||
|         then | ||||
|           echo_error "Unable to find an image for \"${image_count_embedded}\", \"${this_image}\"." | ||||
|         fi | ||||
|         this_image="$( basename "${this_image}" )" | ||||
|         this_image_tn="$( find "${working_dir}/" -type f -iname "${this_image%.*}_tn.*" )" | ||||
|         if [[ -z "${this_image_tn}" || ! -s "${this_image_tn}" ]] | ||||
|         then | ||||
|           echo "${this_line}" | sed "s@LOCAL_IMAGE_REMOVED@${this_image}@g" >> "${shownotes_html}.embedded_images" | ||||
|         else | ||||
|           this_image_tn="$( basename "${this_image_tn}" )" | ||||
|           echo "<a href=\"${this_image}\">" >> "${shownotes_html}.embedded_images" | ||||
|           echo "${this_line}" | sed "s@LOCAL_IMAGE_REMOVED@${this_image_tn}@g" >> "${shownotes_html}.embedded_images" | ||||
|           echo "</a>" >> "${shownotes_html}.embedded_images" | ||||
|         fi | ||||
|         ((image_count_embedded=image_count_embedded+1)) | ||||
|       fi | ||||
|     done | ||||
|      | ||||
|     mv -v "${shownotes_html}.embedded_images" "${shownotes_html}" | ||||
|      | ||||
|   else | ||||
|     echo_debug "No embedded images found. ${image_count_embedded}" | ||||
|   fi | ||||
|    | ||||
|    | ||||
|   # <img alt="Picture 1 shows the broken dog    walking accessory." border="0" height="300" src="hpr4283_image_1_tn.jpeg" width="400" /> | ||||
|   if [ "${image_count_external}" -gt "1" ] | ||||
|   then | ||||
|     image_count_external="1" | ||||
|      | ||||
|     touch "${shownotes_html}.external_images" | ||||
|      | ||||
|     cat "${shownotes_html}" | remove-image.pl | while read this_line | ||||
|     do | ||||
|       if [ "$( echo "${this_line}" | grep --count "REMOTE_IMAGE_REMOVED" )" -eq "0" ] | ||||
|       then | ||||
|         echo "${this_line}" >> "${shownotes_html}.external_images" | ||||
|       else | ||||
|         this_image="$( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_ext_${image_count_external}.*" )" | ||||
|         if [[ -z "${this_image}" || ! -s "${this_image}" ]] | ||||
|         then | ||||
|           echo_error "Unable to find an image for \"${image_count_external}\", \"${this_image}\"." | ||||
|         fi | ||||
|         this_image="$( basename "${this_image}" )" | ||||
|         this_image_tn="$( find "${working_dir}/" -type f -iname "${this_image%.*}_tn.*" )" | ||||
|         if [[ -z "${this_image_tn}" || ! -s "${this_image_tn}" ]] | ||||
|         then | ||||
|           echo "${this_line}" | sed "s@REMOTE_IMAGE_REMOVED@${this_image}@g" >> "${shownotes_html}.external_images" | ||||
|         else | ||||
|           this_image_tn="$( basename "${this_image_tn}" )" | ||||
|           echo "<a href=\"${this_image}\">" >> "${shownotes_html}.external_images" | ||||
|           echo "${this_line}" | sed "s@REMOTE_IMAGE_REMOVED@${this_image_tn}@g" >> "${shownotes_html}.external_images" | ||||
|           echo "</a>" >> "${shownotes_html}.external_images" | ||||
|         fi | ||||
|         ((image_count_external=image_count_external+1)) | ||||
|       fi | ||||
|     done | ||||
|      | ||||
|     mv -v "${shownotes_html}.external_images" "${shownotes_html}"  | ||||
|      | ||||
|   else | ||||
|     echo_debug "No external images found." | ||||
|   fi | ||||
|  | ||||
| ## TODO End Temp fix  | ||||
|  | ||||
| @@ -1095,7 +1172,7 @@ $(cat "${working_dir}/hpr${ep_num}.srt" ) | ||||
| </pre> | ||||
| <hr /> | ||||
|  | ||||
| <h3>Raw shownotes.html</h3> | ||||
| <h3>Transcript File</h3> | ||||
|  | ||||
| <hr /> | ||||
| <pre> | ||||
| @@ -1151,13 +1228,16 @@ function register_assets() { | ||||
|    | ||||
|   echo '"episode_id","filename","extension","size", "sha1sum", "mime_type", "file_type"' | tee "${working_dir}/hpr${ep_num}_assets.csv" | ||||
|    | ||||
|   for this_asset in hpr${ep_num}.flac hpr${ep_num}.wav hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt hpr${ep_num}_image_*.* | ||||
|   for this_asset_filename in hpr${ep_num}.flac hpr${ep_num}.wav hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt $( find "${working_dir}/" -maxdepth 1 -type f -iname "hpr${ep_num}_image_*.*" ) | ||||
|   do | ||||
|     echo_debug "Registering \"${this_asset}\"." | ||||
|     if [[ ! -s "${working_dir}/${this_asset}" ]] | ||||
|     this_asset_filename="$( basename "${this_asset_filename}" )" | ||||
|     echo_debug "Registering \"${this_asset_filename}\"." | ||||
|     this_asset="${working_dir}/${this_asset_filename}" | ||||
|      | ||||
|     if [[ ! -s "${this_asset}" ]] | ||||
|     then | ||||
|       echo_error "Failed to register missing file \"${working_dir}/${this_asset}\"." | ||||
|       ls -al "${working_dir}/${this_asset}" | ||||
|       echo_error "Failed to register missing file \"${this_asset}\"." | ||||
|       ls -al "${this_asset}" | ||||
|     fi | ||||
|     this_asset_basename=$( basename "${this_asset}" ) | ||||
|     this_asset_extension="${this_asset_basename##*.}" | ||||
| @@ -1166,6 +1246,11 @@ function register_assets() { | ||||
|     this_asset_mime_type=$( file --dereference --brief --mime "${this_asset}" ) | ||||
|     this_asset_file_type=$( file --dereference --brief "${this_asset}" ) | ||||
|      | ||||
|     if [ "$( echo ${this_asset_file_type} | wc --chars )" -gt "130" ] | ||||
|     then | ||||
|       this_asset_file_type="${this_asset_mime_type}" | ||||
|     fi | ||||
|      | ||||
|     variables=( ep_num this_asset_basename this_asset_extension this_asset_size this_asset_sha1sum this_asset_mime_type this_asset_file_type working_dir ep_num ) | ||||
|  | ||||
|     for variable in "${variables[@]}" | ||||
| @@ -1191,13 +1276,52 @@ function register_assets() { | ||||
|   then | ||||
|     echo_error "The asset json file \"${working_dir}/hpr${ep_num}_assets.json\" is missing."; | ||||
|   fi | ||||
|      | ||||
|   if [ "$( curl --silent --netrc-file $HOME/.netrc --write-out '%{http_code}' --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @"${working_dir}/hpr${ep_num}_assets.json" --header "Content-Type: application/json" )" != 200 ] | ||||
|    | ||||
|   response="$( curl --silent --netrc-file $HOME/.netrc --write-out '%{http_code}' --output /dev/null --request POST https://hub.hackerpublicradio.org/cms/assets.php --data-ascii @"${working_dir}/hpr${ep_num}_assets.json" --header "Content-Type: application/json" )" | ||||
|   if [[ -z "${response}" || "${response}" != "200" ]] | ||||
|   then | ||||
|     echo_error "The assets for episode hpr${ep_num} has not been registered." | ||||
|     echo_error "The assets for episode hpr${ep_num} has not been registered. The response was \"${response}\"" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| ################################################# | ||||
| # Register the assets with the hpr database | ||||
|  | ||||
| function copy_files_to_origin_server() { | ||||
|   echo_debug "Copying the files to the origin server" | ||||
|    | ||||
|   # TODO get a origin server capable of storing all the files | ||||
|   for this_asset in hpr${ep_num}.mp3 hpr${ep_num}.ogg hpr${ep_num}.opus hpr${ep_num}.srt hpr${ep_num}.txt $( find "${working_dir}/" -type f -iname "hpr${ep_num}_image_*.*" ) | ||||
|   do | ||||
|     this_asset="$( basename ${this_asset} )" | ||||
|     this_file="${working_dir}/${this_asset}" | ||||
|    | ||||
|     echo_debug "Copying \"${this_file}\" to the origin server." | ||||
|      | ||||
|     if [[ ! -s "${this_file}" ]] | ||||
|     then | ||||
|       echo_error "Failed to transfer missing file \"${this_file}\"." | ||||
|       ls -al "${this_file}" | ||||
|     fi | ||||
|    | ||||
|     rsync --archive --quiet --partial --progress "${this_file}" rsync.net:hpr/eps/hpr${ep_num}/${this_asset} | ||||
|      | ||||
|     origin_sha1sum="$( echo $( ssh rsync.net "sha1 hpr/eps/hpr${ep_num}/${this_asset}" 2> /dev/null ) | awk '{print $NF}' )" | ||||
|     this_asset_sha1sum="$( sha1sum "${this_file}" | awk '{print $1}' )" | ||||
|      | ||||
|     if [[ -z "${origin_sha1sum}" || -z "${this_asset_sha1sum}" ]] | ||||
|     then | ||||
|       echo_error "Could not determine the local/origin sha1sum for file \"${this_file}\"." | ||||
|     fi | ||||
|      | ||||
|     if [ "${origin_sha1sum}" != "${this_asset_sha1sum}" ] | ||||
|     then | ||||
|       echo_error "The local sha1sum \"${origin_sha1sum}\" and origin \"${this_asset_sha1sum}\" are mismatched for file \"${this_file}\"." | ||||
|     fi | ||||
|     | ||||
|   done | ||||
| } | ||||
|  | ||||
| ################################################# | ||||
| # Send the derived files to the server borg to be sent to the Internet Archive | ||||
|  | ||||
| @@ -1288,27 +1412,19 @@ function copy_derived_files_to_borg_for_the_internet_archive() { | ||||
| # TODO Add support for community news - reusing ^^^ | ||||
| # TODO Add support for stereo for some episodes that request it | ||||
| # TODO Include links in extract_images_brute_force | ||||
| # TODO run hpr_generator to genrate only the new episode | ||||
| # TODO take screenshots of the rendered episode on the hpr website | ||||
| # TODO audio_channels default to mono - stereo as an option | ||||
| # TODO check the channels on the source audio and add a warning in the report to check it's ok to reduce to mono | ||||
| # TODO Add chapter support  | ||||
| # TODO incorporate direct upload to the IA | ||||
| # TODO change MEDIA_TRANSCODED | ||||
| # TODO incorporate assets  | ||||
| # TODO copy the files to the backup disk | ||||
| # TODO copy the derived files to the ccdn origin server  | ||||
| # TODO fix permissions on vger(two) | ||||
|  | ||||
| # TODO  | ||||
|  | ||||
| program_checks              # We know that all the programs and variables are set | ||||
|  | ||||
| get_working_dir $@          # We have a working directory and a valid json file | ||||
|  | ||||
| get_episode_metadata $@     # We have all the metadata we need to process the show. | ||||
|  | ||||
| extract_images_brute_force  # Extract_images by brute force | ||||
|   | ||||
| extract_images_brute_force  # We have extracted the images by brute force | ||||
|  | ||||
| media_checks                #  | ||||
|  | ||||
| @@ -1334,6 +1450,8 @@ manual_final_review | ||||
|  | ||||
| register_assets | ||||
|  | ||||
| copy_files_to_origin_server | ||||
|  | ||||
| copy_derived_files_to_borg_for_the_internet_archive | ||||
|  | ||||
| echo_debug "The End"  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user