b7cae1cb90
Show_Submission/copy_shownotes: Changed the location of the function library Show_Submission/do_brave: Updates to the way local stand-alone HTML is generated for review purposes. Show_Submission/do_index: Changed the location of the function library Show_Submission/do_pandoc: Changed the location of the function library; now uses 'author_title.pl' to generate YAML for Pandoc Show_Submission/do_parse: Trivial change Show_Submission/do_pictures: Changed the location of the function library; better handling of the show specification Show_Submission/do_report: Changed the location of the function library Show_Submission/do_update_reservations: Changed the location of the function library Show_Submission/fix_relative_links: Added features 'say' and 'state' Show_Submission/parse_JSON: New checks: notes too short, trailing spaces on title, summary and tags (needing JSON changes). Check for Markdown in the assets (see 'do_pandoc_assets'). New 'trim' function.
516 lines
16 KiB
Bash
Executable File
516 lines
16 KiB
Bash
Executable File
#!/bin/bash -
|
|
#===============================================================================
|
|
#
|
|
# FILE: do_pandoc
|
|
#
|
|
# USAGE: ./do_pandoc [option] <epno>
|
|
#
|
|
# DESCRIPTION: Performs an "intelligent" Pandoc run on various types of show
|
|
# notes. Converts various markup formats into HTML. Treate plain
|
|
# text as Markdown, though this depends on the prior editing
|
|
# step doing the right thing. Handles pictures and other assets
|
|
# in plain text shows that have them - this is done by the edit
|
|
# phase adding TT² macros and this script processing them with
|
|
# 'tpage'.
|
|
# Version 0:2:5 (released 2022-12-04) has not yet been fully
|
|
# tested. Seems reliable 2023-03-03.
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: 2021-04-03: removed the TOC option
|
|
# 2021-11-07: Added --strip-comments to the HTML snippet
|
|
# generation stage
|
|
# 2022-11-01: Big rewrite over the month. Refer to
|
|
# do_pandoc_0.1.6 for the previous version since there have been
|
|
# some big changes.
|
|
# 2022-12-17: Ending reliance on shownotes.txt, and using
|
|
# shownotes.json instead into the future. Some massive tidying
|
|
# and rationalisation are still required.
|
|
# 2023-03-03: If the title contained a quote then the previous
|
|
# algorithm made bad YAML which caused the 'full' html to fail.
|
|
# Fixed.
|
|
# 2023-11-15: The 'prefix' setting is wrong. It needs another
|
|
# 'hpr1234' directory level. This is needed because the
|
|
# top-level stuff for the show is already in such a directory,
|
|
# and while we have assets in a sub-directory we need to be
|
|
# careful about collisions.
|
|
#
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.2.11
|
|
# CREATED: 2016-08-16 15:34:30
|
|
# REVISION: 2024-10-18 23:03:25
|
|
#
|
|
#===============================================================================
|
|
|
|
set -o nounset # Treat unset variables as an error
|
|
|
|
SCRIPT=${0##*/}
|
|
#DIR=${0%/*}
|
|
|
|
VERSION='0.2.11'
|
|
|
|
STDOUT="/dev/fd/2"
|
|
|
|
#
|
|
# Load library functions
|
|
#
|
|
LIB="$HOME/HPR/function_lib.sh"
|
|
[ -e "$LIB" ] || { echo "$SCRIPT: Unable to source functions"; exit 1; }
|
|
# shellcheck source=/home/cendjm/HPR/function_lib.sh
|
|
source "$LIB"
|
|
|
|
#
|
|
# Colour codes
|
|
#
|
|
define_colours
|
|
|
|
# {{{ Functions: -- _usage --
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _usage
|
|
# DESCRIPTION: Report usage
|
|
# PARAMETERS: None
|
|
# RETURNS: Nothing
|
|
#===============================================================================
|
|
_usage () {
|
|
cat >$STDOUT <<-endusage
|
|
Usage: ./${SCRIPT} [-h] [-d] [-D] shownumber
|
|
|
|
Version: $VERSION
|
|
|
|
Runs Pandoc against a particular show, choosing a format as
|
|
defined by the declared format (in the file '.format').
|
|
|
|
(In this version there is no method to force an explicit input format)
|
|
|
|
Options:
|
|
-h Print this help
|
|
-d Select dry run mode
|
|
-D Turn on debug mode with lots of extra output
|
|
|
|
The default behaviour is now to access the '.format' file in the show
|
|
directory.
|
|
|
|
Arguments:
|
|
shownumber
|
|
|
|
Examples
|
|
./${SCRIPT} -h
|
|
./${SCRIPT} -d 2240
|
|
./${SCRIPT} -D 2250
|
|
|
|
endusage
|
|
exit
|
|
}
|
|
# }}}
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
#
|
|
# Formats offered by the web form on the HPR site:
|
|
#
|
|
# Keyword Explanation
|
|
# ------- -----------
|
|
# plain_text Plain text
|
|
# html5 HTML5 (preferred)
|
|
# markdown_standard Markdown (standard)
|
|
# Markdown_GitHub Markdown (GitHub flavoured)
|
|
# Markdown_Pandoc Markdown (Pandoc flavoured)
|
|
# restructured_text RestructuredText
|
|
# txt2tags txt2tags
|
|
#
|
|
|
|
#
|
|
# Hash to perform translation from declared format to Pandoc "-from" value
|
|
#
|
|
# {{{ -- 'lookup' hash --
|
|
declare -A lookup
|
|
lookup[plain_text]='markdown-implicit_figures'
|
|
lookup[html5]='html'
|
|
lookup[markdown_standard]='markdown_strict'
|
|
lookup[Markdown_GitHub]='gfm' # Extensions are limited
|
|
lookup[Markdown_Pandoc]='markdown-implicit_figures'
|
|
lookup[restructured_text]='rst'
|
|
lookup[txt2tags]='t2t'
|
|
# }}}
|
|
|
|
#
|
|
# Hash for options when generating standalone readable HTML
|
|
#
|
|
# (The 'smart' extension is only applicable to markdown, commonmark, latex,
|
|
# mediawiki, org, rst, twiki; we want to turn it off to remove smart quotes)
|
|
#
|
|
# {{{ -- 'options' hash --
|
|
declare -A options
|
|
options[plain_text]='-smart'
|
|
options[html5]=''
|
|
options[markdown_standard]='+yaml_metadata_block'
|
|
options[Markdown_GitHub]=''
|
|
options[Markdown_Pandoc]='-smart+yaml_metadata_block'
|
|
options[restructured_text]='-smart'
|
|
options[txt2tags]=''
|
|
# }}}
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
#
|
|
# Sanity checks
|
|
#
|
|
# JQ=$(command -v jq)
|
|
# [ -n "$JQ" ] || { echo "Program 'jq' was not found"; exit 1; }
|
|
# YQ=$(command -v yq)
|
|
# [ -n "$YQ" ] || { echo "Program 'yq' was not found"; exit 1; }
|
|
|
|
#
|
|
# Process options first
|
|
#
|
|
while getopts :dDh opt
|
|
do
|
|
case "${opt}" in
|
|
d) DRYRUN=1;;
|
|
D) DEBUG=1;;
|
|
h) _usage;;
|
|
?) echo "$SCRIPT: Invalid option; aborting"; exit 1;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
|
|
#
|
|
# Default options if not provided
|
|
#
|
|
DEBUG=${DEBUG:-0}
|
|
DRYRUN=${DRYRUN:-0}
|
|
|
|
#
|
|
# Check there's an argument after removing any options. Abort if not
|
|
#
|
|
if [[ $# -ne 1 ]]; then
|
|
_usage
|
|
fi
|
|
|
|
#
|
|
# Declare variables for later
|
|
# TODO: Is this necessary?
|
|
#
|
|
declare SHOWID FROM POPTIONS
|
|
|
|
#
|
|
# Make the explicit show id, catering for leading zeroes (belt & braces)
|
|
#
|
|
printf -v SHOWID 'hpr%04d' "$1"
|
|
|
|
#
|
|
# Make temporary files and set traps to delete them
|
|
#
|
|
TMP1=$(mktemp) || {
|
|
echo "$SCRIPT: ${red}creation of temporary file failed!${reset}"
|
|
exit 1
|
|
}
|
|
TMP2=$(mktemp) || {
|
|
echo "$SCRIPT: ${red}creation of temporary file failed!${reset}"
|
|
exit 1
|
|
}
|
|
TMP3=$(mktemp) || {
|
|
echo "$SCRIPT: ${red}creation of temporary file failed!${reset}"
|
|
exit 1
|
|
}
|
|
trap 'cleanup_temp $TMP1 $TMP2 $TMP3' SIGHUP SIGINT SIGPIPE SIGTERM EXIT
|
|
|
|
#
|
|
# Paths to files
|
|
#
|
|
# ------------------------------------------------------------------------------
|
|
# Main directory
|
|
BASENAME="$HOME/HPR/Show_Submission"
|
|
|
|
# JSON to YAML Perl script - sanity check
|
|
J2Y="$BASENAME/author_title.pl"
|
|
[ -e "$J2Y" ] || { echo "Program '$J2Y' was not found"; exit 1; }
|
|
|
|
# The notes for all shows are here
|
|
SHOWNOTES="$BASENAME/shownotes"
|
|
|
|
# Notes for this show are here
|
|
SHOWDIR="$SHOWNOTES/$SHOWID"
|
|
|
|
# Paths to all files already created or being created here
|
|
#RAWFILE="$SHOWDIR/shownotes.txt"
|
|
JSONFILE="$SHOWDIR/shownotes.json"
|
|
FMTFILE="$SHOWDIR/.format"
|
|
PICLIST="$SHOWDIR/.pictures"
|
|
STATUS="$SHOWDIR/.status"
|
|
EXTRACT="$SHOWDIR/${SHOWID}.out"
|
|
FULLHTML="$SHOWDIR/${SHOWID}_full.html"
|
|
HTML="$SHOWDIR/${SHOWID}.html"
|
|
|
|
# TT² macros and paths for adding pictures
|
|
PICTPL="$BASENAME/pic_definitions.tpl"
|
|
MANIFEST="$SHOWDIR/.pictures.mf" # From do_pictures
|
|
# ------------------------------------------------------------------------------
|
|
|
|
#
|
|
# The partial URL for pictures on the HPR site
|
|
#
|
|
BASEURL='https://hackerpublicradio.org/eps/'
|
|
|
|
#{{{ --- Obsolete 2022-12-17 ---
|
|
#
|
|
# Make a metadata file by parsing the raw data file fields with awk. Save it
|
|
# in a temporary file.
|
|
#
|
|
# See the original do_pandoc_0.1.6 for the extended comments that led to this
|
|
# design. We were trying to make acceptable YAML, but ended up extracting
|
|
# metadata from the result.
|
|
# ----
|
|
# TODO: 2022-12-01 Rationalise all of this; it's full of debris from previous
|
|
# attempts to solve the problem of passing metadata to Pandoc.
|
|
# ----
|
|
#
|
|
# awk -f - "$RAWFILE" > "$TMP1" <<'ENDAWK'
|
|
# BEGIN {print "---"}
|
|
# /^Title:/ && got_title == 0 {
|
|
# sub(/^Title:\s+/,"")
|
|
# printf "#title: %s\n",$0
|
|
# gsub(/'/,"''")
|
|
# printf "title: '%s'\n",$0
|
|
# got_title = 1
|
|
# }
|
|
# /^Host_Name:/ && got_author == 0 {
|
|
# sub(/^Host_Name:\s+/,"")
|
|
# printf "#author: %s\n",$0
|
|
# gsub(/'/,"''")
|
|
# printf "author: '%s'\n",$0
|
|
# got_author = 1
|
|
# }
|
|
# END {print "---"}
|
|
# ENDAWK
|
|
#}}}
|
|
|
|
#
|
|
# Use 'jq' to parse the JSON and make the metadata (in the form of YAML)
|
|
# needed for Pandoc
|
|
#
|
|
# Non-YAML alternative - not chosen
|
|
# jqprog="@text \"author: \(.host.Host_Name)\ntitle: \(.episode.Title)\""
|
|
#
|
|
# Testing another formatter (Journal 2023-03-03)
|
|
# jqprog="@sh \"---\nauthor: \(.host.Host_Name)\ntitle: \(.episode.Title)\n---\""
|
|
# Added quotes around the generated strings (2023-03-31)
|
|
# jqprog="@text \"---\nauthor: \(.host.Host_Name)\ntitle: \(.episode.Title)\n---\""
|
|
#
|
|
# Moved to 'yq' 2023-04-01
|
|
# jqprog="@text \"---\nauthor: '\(.host.Host_Name)'\ntitle: '\(.episode.Title)'\n---\""
|
|
# jq -r "$jqprog" "$JSONFILE" > "$TMP1"
|
|
#
|
|
# On 2023-10-01 wrote a Perl JSON to YAML generator just for these two
|
|
# elements. It's called 'author_title.pl'
|
|
#
|
|
# yqprog='{author:.host.Host_Name,title:.episode.Title}'
|
|
# ( echo "---"; $YQ -y "$yqprog" "$JSONFILE"; echo "---"; ) > "$TMP1"
|
|
#
|
|
$J2Y "$JSONFILE" "$TMP1"
|
|
_DEBUG "YAML:" "$(cat "$TMP1")"
|
|
|
|
#
|
|
# Check the main output file from do_parse exists
|
|
#
|
|
if [[ ! -e $EXTRACT ]]; then
|
|
echo "$SCRIPT: ${red}File not found: $EXTRACT${reset}"
|
|
exit 1
|
|
fi
|
|
|
|
#
|
|
# Get the format or fail with an error
|
|
#
|
|
if [[ -e $FMTFILE ]]; then
|
|
FORMAT=$(cat "$FMTFILE")
|
|
else
|
|
# TODO: Should we default to something rather than abort?
|
|
echo "$SCRIPT: ${red}Could not find declared format (.format file)${reset}"
|
|
echo "${yellow}Has do_parse been run? If so try and fix the .format file.${reset}"
|
|
exit
|
|
fi
|
|
|
|
#
|
|
# Need to match plain text and Markdown variants when deciding to use the
|
|
# manifest file and 'tpage'
|
|
#
|
|
FMTRE='^(plain_text|[Mm]arkdown_)'
|
|
|
|
#
|
|
# Determine if there are pictures
|
|
#
|
|
if [[ -e $PICLIST ]]; then
|
|
hasPictures=1
|
|
else
|
|
hasPictures=0
|
|
fi
|
|
|
|
#
|
|
# Here we use the declared format to determine what to do.
|
|
#
|
|
# This code now blocks HTML->HTML processing since it just confuses matters.
|
|
# The files hprNNNN.out and hprNNNN.html are linked to one another, so editing
|
|
# the former will edit the latter in preparation for uploading if there are
|
|
# changes that need to be made. See the journal discussion for 2018-05-24.
|
|
# ---
|
|
# TODO: 2022-12-01 If the notes are HTML but declared as something else then
|
|
# this check will not work. Trouble is, parse_JSON will have worked this out
|
|
# but not saved it so we can't avoid running Pandoc on HTML in this case.
|
|
# ---
|
|
# TODO: 2022-12-01 Look at resolving this in parse_JSON?
|
|
# ---
|
|
#
|
|
if [[ $FORMAT == 'html5' ]]; then
|
|
echo "${red}Running Pandoc on HTML is not allowed${reset}"
|
|
echo "${yellow}Run do_edit to edit as necessary and do_browser to view${reset}"
|
|
exit
|
|
elif [[ $FORMAT == 'plain_text' ]]; then
|
|
echo "${yellow}Format chosen is plain text${reset}"
|
|
if [[ $hasPictures -eq 0 ]]; then
|
|
echo "${yellow}This will be treated as Markdown${reset}"
|
|
else
|
|
echo "${yellow}This will be treated as Markdown and will need special action${reset}"
|
|
echo "${yellow}since there are pictures${reset}"
|
|
fi
|
|
fi
|
|
FROM=${lookup[$FORMAT]}
|
|
POPTIONS=${options[$FORMAT]}
|
|
echo "${green}Will process $FORMAT with 'pandoc -f ${FROM}'${reset}"
|
|
echo "${yellow}Options chosen for --standalone are '${POPTIONS}'${reset}"
|
|
|
|
#
|
|
# Only for plain text notes, process pictures for the HTML we'll be adding to
|
|
# the database.
|
|
#
|
|
# We need the following things:
|
|
# - The notes to be plain text format
|
|
# - Some pictures
|
|
# - Files written by do_parse and do_pictures:
|
|
# - .assets (not used here)
|
|
# - .pictures (were previously used here, but no longer)
|
|
# - .pictures.mf (needed by the TT² macros)
|
|
# - .pictures.tt2 (the TT² macro calls - already edited into the notes)
|
|
# - pic_definitions.tpl (macro definitions, common to all shows)
|
|
# - Not to be in dry run mode; if we are we just report intentions
|
|
#
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
# if [[ $FORMAT == 'plain_text' && -e $MANIFEST ]]; then
|
|
if [[ $FORMAT =~ $FMTRE && -e $MANIFEST ]]; then
|
|
#
|
|
# Deal with pictures using the TT² macros
|
|
#
|
|
_DEBUG "Processing TT² inclusions"
|
|
|
|
# Make a picture manifest with a header
|
|
awk 'BEGIN{print "file : thumb"}{p1=$0; getline p2; printf "%s : %s\n",p1,p2}' \
|
|
"$MANIFEST" > "$TMP2"
|
|
|
|
_DEBUG "Picture list:" "$(cat "$TMP2")" "---" \
|
|
"BASEURL=${BASEURL}${SHOWID}/" \
|
|
"EXTRACT=$EXTRACT" \
|
|
"Extract file contents:" "$(cat "$EXTRACT")" "---"
|
|
|
|
# Run the macros on the notes to make $TMP3 for Pandoc
|
|
tpage --pre_process="$PICTPL" \
|
|
--define "piclist=$TMP2" \
|
|
--define "prefix=${BASEURL}${SHOWID}/${SHOWID}/" \
|
|
"$EXTRACT" > "$TMP3"
|
|
|
|
_DEBUG "Processed by tpage" "$(cat "$TMP3")" "---"
|
|
else
|
|
# Not plain text or a Markdown variant and no pictures, so put the
|
|
# notes in $TMP3 where Pandoc will look for them
|
|
cat "$EXTRACT" > "$TMP3"
|
|
fi
|
|
else
|
|
#
|
|
# We would not have used TT² unless the notes were plain text (or
|
|
# a Markdown variant) and there were pictures
|
|
#
|
|
# if [[ $FORMAT == 'plain_text' && -e $MANIFEST ]]; then
|
|
if [[ $FORMAT =~ $FMTRE && -e $MANIFEST ]]; then
|
|
echo "${yellow}Would have prepared TT² code for pandoc${reset}"
|
|
fi
|
|
fi
|
|
|
|
#
|
|
# Generate an HTML snippet for adding to the database.
|
|
# (Note 2021-11-24: Added --ascii option.)
|
|
#
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
# shellcheck disable=SC2086
|
|
pandoc -f "$FROM"-smart -t html5 --ascii --no-highlight --strip-comments \
|
|
"$TMP3" -o "$HTML" # $EXTRAS
|
|
RES=$?
|
|
|
|
if [[ $RES -eq 0 ]]; then
|
|
echo "$SCRIPT: ${green}Created shownotes/$SHOWID/${HTML##*/}${reset}"
|
|
else
|
|
echo "$SCRIPT: ${red}Oops! Something went wrong! (line $LINENO)${reset}"
|
|
echo "${yellow}$SCRIPT: Aborting now${reset}"
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "${yellow}Would have run pandoc to make HTML for upload${reset}"
|
|
fi
|
|
|
|
#
|
|
# Make HTML for proof reading. All pictures referenced are now on the HPR
|
|
# server (we ran 'do_asset_upload'), so we want to refer to them here.
|
|
#
|
|
# File $TMP2 contains the .pictures.mf contents with a header line; and it
|
|
# contains data for the macros. It was created when we prepared the main HTML
|
|
# for the database. We use $BASEURL again here because we want to reference
|
|
# the pictures on the server.
|
|
#
|
|
# We use the awk-formatted file (now yq-formatted) in $TMP1 from earlier to do
|
|
# this. At the end TMP3 contains Markdown for Pandoc.
|
|
#
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
# if [[ $FORMAT == 'plain_text' && -e $MANIFEST ]]; then
|
|
if [[ $FORMAT =~ $FMTRE && -e $MANIFEST ]]; then
|
|
tpage --pre_process="$PICTPL" \
|
|
--define "piclist=$TMP2" \
|
|
--define "prefix=${BASEURL}${SHOWID}/${SHOWID}/" \
|
|
"$EXTRACT" > "$TMP3"
|
|
else
|
|
cat "$EXTRACT" > "$TMP3"
|
|
fi
|
|
|
|
#
|
|
# Generate complete HTML that we can proofread. We need metadata for this
|
|
# stand-alone HTML which is in the form of YAML in this version.
|
|
#
|
|
# ----------------------------------------------------------------------
|
|
# Original options below when using 'awk' to parse shownotes.txt:
|
|
# --metadata="$(sed -n '/^#author:/{s/#//;p}' "$TMP1")" \
|
|
# --metadata="$(sed -n '/^#title:/{s/#//;p}' "$TMP1")" \
|
|
#
|
|
# shellcheck disable=SC2086
|
|
pandoc -f ${FROM}${POPTIONS} -t html5 --ascii \
|
|
--standalone --template=hpr_dev.html5 --no-highlight \
|
|
-c https://hackerpublicradio.org/css/hpr.css \
|
|
--metadata-file="$TMP1" -o "$FULLHTML" "$TMP3"
|
|
RES=$?
|
|
|
|
if [[ $RES -eq 0 ]]; then
|
|
echo "$SCRIPT: ${green}Created shownotes/$SHOWID/${FULLHTML##*/}${reset}"
|
|
else
|
|
echo "$SCRIPT: ${red}Oops! Something went wrong making the full HTML! (line $LINENO)${reset}"
|
|
fi
|
|
else
|
|
# Dry run
|
|
echo "${yellow}Would have run pandoc to make HTML for proof reading${reset}"
|
|
fi
|
|
|
|
#
|
|
# Set the status for this show
|
|
#
|
|
if [[ $DRYRUN -eq 0 ]]; then
|
|
echo "converted" >> "$STATUS"
|
|
fi
|
|
|
|
exit
|
|
|
|
# vim: syntax=sh:ts=8:sw=4:ai:et:tw=78:fo=tcrqn21:fdm=marker
|