1
0
forked from HPR/hpr-tools

Compare commits

..

9 Commits
main ... main

Author SHA1 Message Date
7fe9f60205 The show processing needs to be refactored #5 2025-01-16 22:05:35 +01:00
5cfdd42b11 The show processing needs to be refactored #5 2025-01-16 22:02:43 +01:00
e1df438111 The show processing needs to be refactored #5 2025-01-15 22:08:04 +01:00
5feaed5f46 The show processing needs to be refactored #5 2025-01-14 20:41:34 +01:00
Dave Morriss
4feae03fee Updated 'future_upload' in 'check_uploads' 2025-01-01 18:16:35 +00:00
Dave Morriss
a3c8586730 Minor updates
Show_Submission/extract_images: some tidying, addition of more POD
    documentation.

Show_Submission/query2csv, Show_Submission/query2json: these softlinks
    were turned to hard links to make them more visible in the Git repo.
    The files are in the 'Database/' directory.
2024-12-31 21:41:52 +00:00
Dave Morriss
2f350dd1db Correction to the component order of the generated file name 2024-12-31 17:32:27 +00:00
Dave Morriss
e0c4545295 Tidied and enhanced 'extract_images'
Show_Submission/extract_images: removed unwanted modules, added POD
    documentation, added 'coalesce' routine, adjusted to use the
    absolute paths to the input files and generated image files. The
    latter are always written to the directory where the HTML resides.
    Corrected logic around overwriting image files and '--force'.

    Still an 'alpha' version subject to more testing.
2024-12-30 12:15:04 +00:00
Dave Morriss
37567bfd16 New 'extract_images' script
Show_Submission/extract_images: new script to read an HTML file looking
    for 'data' scheme URIs (embedded images), extract them and modify
    the HTML to reflect the new source of the image. At present it
    writes a generated file name with a sequence number in it, but the
    appropriate suffix/extension for the image type. This is an alpha
    version which needs further work.

Show_Submission/parse_JSON: attempting to debug a JSON parsing failure.
2024-12-29 16:33:52 +00:00
16 changed files with 2548 additions and 1075 deletions

View File

@ -13,9 +13,9 @@
# NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as # NOTES: Contains methods from 'delete_uploaded' and 'weekly_upload' as
# well as 'update_state' # well as 'update_state'
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com # AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.15 # VERSION: 0.0.16
# CREATED: 2021-01-07 12:11:02 # CREATED: 2021-01-07 12:11:02
# REVISION: 2024-07-29 23:17:45 # REVISION: 2025-01-01 11:48:40
# #
#=============================================================================== #===============================================================================
@ -26,7 +26,7 @@ SCRIPT=${0##*/}
STDOUT="/dev/fd/2" STDOUT="/dev/fd/2"
VERSION="0.0.15" VERSION="0.0.16"
# #
# Load library functions # Load library functions
@ -51,13 +51,24 @@ check_uploads () {
# #
# Look for files called hpr1234.flac and so on. Don't bother with the # Look for files called hpr1234.flac and so on. Don't bother with the
# hpr1234_source.flac one. As soon as a file is missing return with false. # hpr1234_source.flac one. As soon as a file is missing return with false.
# 2025-01-01: Dropped 'spx' from the list
# #
for suff in flac mp3 ogg opus spx wav; do for suff in flac mp3 ogg opus wav; do
if [[ ! -e $UPLOADS/$prefix.$suff ]]; then if [[ ! -e $UPLOADS/$prefix.$suff ]]; then
return 1 return 1
fi fi
done done
#
# Transcripts are (currently) in a sub-directory with the same name as the
# IA item. We only cater for two types as of 2025.
#
for suff in txt srt; do
if [[ ! -e $UPLOADS/$prefix/$prefix.$suff ]]; then
return 1
fi
done
return 0 return 0
} }

739
Show_Submission/extract_images Executable file
View File

@ -0,0 +1,739 @@
#!/usr/bin/env perl
#===============================================================================
#
# FILE: extract_images
#
# USAGE: ./extract_images [--help] [--documentation|man]
# [--prefix=STRING] [--[no-]backup] [--force] [--[no]silent]
# HTML_file [ [HTML_file_2] [HTML_file_3] ... ]
#
# DESCRIPTION: Processes HTML files which may have 'data' scheme URIs containing
# images, and extracts these images into files in the same
# directory. The 'data' scheme links are converted to 'https'
# and reference the extracted files. The modified HTML is
# output, and the original will be saved as a backup if
# requested.
#
# The 'data' URI scheme is specified in RFC 2397.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.3
# CREATED: 2024-12-25 10:53:15
# REVISION: 2024-12-31 20:56:50
#
#===============================================================================
use v5.36;
use strict;
use warnings;
use feature qw{ say state try };
no warnings qw{ experimental::try };
use open ':std', ':encoding(UTF-8)'; # Make all IO UTF-8
use Getopt::Long;
use Pod::Usage;
use HTML::TreeBuilder 5 -weak;
use URI;
use MIME::Types;
use Path::Tiny;
use File::Copy;
use Data::Dumper;
#
# Version number (Incremented by Vim)
#
our $VERSION = '0.0.3';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
my ( $notes, $typename, $uri );
my ( $fcount, $basename, $filename, $suffix, $fh );
my ( $updates, $bsuffix, $newURL );
my $backupcount = 5;
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
# {{{
#
# Option defaults
#
my $DEFDEBUG = 0;
my $DEFPREFIX = 'image';
my %options;
Options( \%options );
#
# Default help shows minimal information
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
if ( $options{'help'} );
#
# The -documentation or -man option shows the full POD documentation through
# a pager for convenience
#
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
if ( $options{'documentation'} );
#
# Collect options
#
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG );
my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 );
my $prefix = ( defined( $options{prefix} ) ? $options{prefix} : $DEFPREFIX );
my $backup = ( defined( $options{backup} ) ? $options{backup} : 1 );
my $force = ( defined( $options{force} ) ? $options{force} : 0 );
#
# Check we have arguments
#
pod2usage(
-msg => "Missing arguments. One or more HTML file names are needed\n",
-exitval => 1,
-verbose => 0
)
unless ( scalar(@ARGV) > 0 );
#
# Clean up the prefix
#
$prefix = ($prefix =~ /(.*)_$/ ? $1 : $prefix);
#
# Backup suffix (the dot gets added later)
#
$bsuffix = 'bak';
#
# Debug the options
#
_debug(
$DEBUG > 1,
'$silent = ' . $silent,
'$prefix = ' . $prefix,
'$backup = ' . $backup,
'$force = ' . $force,
);
# }}}
#-------------------------------------------------------------------------------
# Prepare a MIME::Types object
#-------------------------------------------------------------------------------
my $mt = MIME::Types->new;
#-------------------------------------------------------------------------------
# Loop through the arguments
#-------------------------------------------------------------------------------
foreach my $notesfile (@ARGV) {
#
# Get the absolute path of the argument
#
my $abs_nf = path($notesfile)->absolute;
#
# Get the 'dirname' from the absolute path
#
my $dirname = path($abs_nf)->parent->stringify;
unless (-e $abs_nf) {
warn "Unable to find $notesfile\n";
next;
}
#
# Force the MIME type of $notesfile to a string
#
$typename = "" . coalesce( $mt->mimeTypeOf("$abs_nf"), '' );
#
# Check the MIME type and reject non-HTML
#
unless ($typename eq 'text/html') {
warn "File $abs_nf is not HTML\n";
next
}
say "Reading from $abs_nf\n" unless $silent;
#
# Get HTML file basename (no path) without the suffix for building filenames
#
$basename = path($abs_nf)->basename('.html');
#
# Read the HTML
#
open (my $nfh, '<', $notesfile) or die "Unable to open $notesfile\n";
$notes = <$nfh>;
close($nfh);
#
# Image files are to have an index/sequence number
#
$fcount = 0;
#
# Keep a note of HTML updates
#
$updates = 0;
#
# Initialise the TreeBuilder
#
my $tree = HTML::TreeBuilder->new;
$tree->ignore_unknown(0);
$tree->no_expand_entities(1);
$tree->p_strict(1);
$tree->store_comments(1);
$tree->warn(1);
#
# Load the HTML obtained from the file into the TreeBuilder
#
$tree->parse_content($notes)
or die "HTML::TreeBuilder failed to parse notes: $!\n";
#
# Loop through the tree looking for 'data' scheme images
#
for ( @{ $tree->extract_links('img') } ) {
my ( $link, $element, $attr, $tag ) = @$_;
$uri = URI->new($link);
unless ($silent) {
say "Scheme: ", $uri->scheme;
say "Media type: ", $uri->media_type;
}
#
# We only care about 'data' scheme stuff - for now anyway, and only
# images within this set
#
if ( $uri->scheme eq 'data' ) {
#
# Only images
#
if ( $uri->media_type =~ /^image/ ) {
#
# Extract the file name suffix from the MIME string, and give it
# a leading '.'
#
( $suffix = $uri->media_type ) =~ s{^.*/}{.};
#
# Construct the filename for this image making sure it's in
# the directory where the HTML is located.
#
# ${fileprefix}_${prefix}_${increment}.${extension}
#
$fcount++;
$filename
= "$dirname/${basename}_${prefix}_${fcount}${suffix}";
say "Writing to: $filename" unless $silent;
say '-' x 40 unless $silent;
#
# If the file exists and --force is not active, warn and skip.
# Otherwise write the file.
#
if ( -e $filename && !$force ) {
warn "File $filename exists; not overwriting\n";
}
else {
#
# Write the data to the file in binary format. The URI
# module does the conversion so it's already binary.
#
$fh = path($filename)->openw_raw;
print $fh $uri->data;
close($fh);
}
#
# Update the HTML with a link to the file we created (or that
# was already there from an earlier time).
#
$updates++;
$newURL = path($filename)->basename;
$element->attr( $attr, $newURL );
}
}
} # extract_links loop
#
# Output the changed HTML turning what became standalone back into
# a <body> fragment.
#
if ($updates > 0) {
my $body = $tree->look_down( _tag => 'body' );
( my $result = $body->as_HTML( undef, ' ', {} ) )
=~ s{(^<body[^>]*>|</body>$)}{}gi;
#$notesfile = path($notesfile)->basename;
if ($backup) {
if (_backup( $abs_nf, $bsuffix, $backupcount )) {
say "$notesfile backed up" unless $silent;
}
}
else {
say "$notesfile not backed up" unless $silent;
}
$fh = path($notesfile)->openw;
say $fh $result;
close($fh);
say "$updates images converted, $notesfile updated" unless $silent;
}
else {
say "No images found, no changes made to $notesfile" unless $silent;
}
} # $notesfile loop
exit;
#=== FUNCTION ================================================================
# NAME: _backup
# PURPOSE: Given a file, make a backup of it by appending $suffix, and
# handle previous backups
# PARAMETERS: $filename path of file to backup
# $suffix suffix to use, default 'bck'
# $limit number of backups to keep
# RETURNS: True or false depending on success
# DESCRIPTION: Checks that the file exists and returns FALSE if it doesn't.
# Rationalises the $limit to avoid it being 0 or less.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub _backup {
my ( $filename, $suffix, $limit ) = @_;
my ( $backupname, $limitname );
#
# Check the target file exists
#
unless ( -e $filename ) {
warn "Unable to find $filename to backup\n";
return 0;
}
#
# Handle invalid $limit values
#
$limit = 1 unless ( $limit >= 1 );
#
# Defaults
#
$suffix = 'bck' unless $suffix;
$limitname = "$filename.$suffix.$limit";
$backupname = "$filename.$suffix";
#
# Look for existing backups
#
if ( -e $backupname ) {
#
# If maximum version exists delete it
#
if ( -e $limitname ) {
unlink($limitname);
}
#
# Go backwards through the versions incrementing their version numbers
#
for ( my $vsn = $limit - 1; $vsn > 0; $vsn-- ) {
if ( -e "$filename.$suffix.$vsn" ) {
move( "$filename.$suffix.$vsn",
sprintf( '%s.%s.%s', $filename, $suffix, $vsn + 1 ) );
}
}
#
# Make $filename.bck into $filename.bck.1
#
move( "$filename.$suffix", "$filename.$suffix.1" );
}
#
# Finally save the $filename as $filename.bck
#
move( $filename, "$filename.$suffix" );
return 1;
}
#=== FUNCTION ================================================================
# NAME: coalesce
# PURPOSE: To find the first defined argument and return it
# PARAMETERS: Arbitrary number of arguments
# RETURNS: The first defined argument or undef if there are none
# DESCRIPTION: Just a simple way of ensuring an 'undef' value is never
# returned when doing so might be a problem.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub coalesce {
foreach (@_) {
return $_ if defined($_);
}
return undef; ## no critic
}
#=== FUNCTION ================================================================
# NAME: _debug
# PURPOSE: Prints debug reports
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
# $messages... Arbitrary list of messages to print
# RETURNS: Nothing
# DESCRIPTION: Outputs messages if $active is true. It removes any trailing
# newline from each one and then adds one in the 'print' to the
# caller doesn't have to bother. Prepends each message with 'D>'
# to show it's a debug message.
# THROWS: No exceptions
# COMMENTS: Differs from other functions of the same name
# SEE ALSO: N/A
#===============================================================================
sub _debug {
my $active = shift;
my $message;
return unless $active;
while ($message = shift) {
chomp($message);
print STDERR "D> $message\n";
}
}
#=== FUNCTION ================================================================
# NAME: Options
# PURPOSE: Processes command-line options
# PARAMETERS: $optref Hash reference to hold the options
# RETURNS: Undef
# DESCRIPTION: Process the options we want to offer. See the documentation
# for details
# THROWS: no exceptions
# COMMENTS: none
# SEE ALSO: n/a
#===============================================================================
sub Options {
my ($optref) = @_;
my @options = (
"help", "documentation|man", "debug=i", "silent!",
"prefix=s", "backup!", "force!",
# "config=s",
);
if ( !GetOptions( $optref, @options ) ) {
pod2usage(
-msg => "$PROG version $VERSION\n",
-exitval => 1,
-verbose => 0
);
}
return;
}
__END__
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{
=head1 NAME
extract_images - extract embedded images from HTML and save as files
=head1 VERSION
This documentation refers to extract_images version 0.0.3
=head1 USAGE
extract_images
[--help] [--documentation|man] [-debug=N]
[--prefix=STRING] [--[no]backup] [--[no]force] [--[no]silent]
HTML_file [ [HTML_file_2] [ [HTML_file_3] ... ] ]
Examples:
extract_images --prefix=picture_ --backup index.html
extract_images --silent --nobackup shownotes.html
=head1 REQUIRED ARGUMENTS
The script requires the names of one or more HTML files which will be scanned
for embedded images.
=head1 OPTIONS
Note that all on/off options can be written as B<--silent>, B<--nosilent> or
B<--no-silent>. A single hyphen can be used at the start or a pair.
=over 8
=item B<--help>
Reports brief information about how to use the script and exits. To see the
full documentation use the option B<--documentation> or B<--man>. Alternatively,
to generate a PDF version use the I<pod2pdf> tool from
I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
installed with the cpan tool as App::pod2pdf.
=item B<--documentation> or B<--man>
Reports full information about how to use the script and exits. Alternatively,
to generate a PDF version use the I<pod2pdf> tool from
I<http://search.cpan.org/~jonallen/pod2pdf-0.42/bin/pod2pdf>. This can be
installed with the cpan tool as App::pod2pdf.
=item B<--debug=N>
Run in debug mode at the level specified by I<N>. Possible values are:
=over 4
=item B<0>
No debugging (the default).
=item B<1>
TBA
=item B<2>
Displays the internal variables used to store the options.
=item B<3>
TBA
=back
=item B<--prefix=STRING>
Since embedded images have no names, when they are written to image files
names are generated for them. These names are built from the following
components:
=over 4
=item B<HTML filename>
The name of the HTML file without a suffix. So, if the name is 'index.html',
the 'index' part will be used.
=item B<prefix>
The prefix string provided by this option, or 'image' if not specified.
=item B<counter>
The images found in the HTML are counted, starting from 1, and this number is
used here.
=item B<extension>
The image format extension, taken from the embedded image information.
Examples would be 'jpg', 'jpeg' and 'png'.
=back
The B<HTML filename>, B<prefix>, B<counter> and B<extension> are joined
together to make the file name as follows:
<HTML filename>_<prefix>_<counter>.<extension>
So the following run of the script would generate the first file shown below
assuming the first embedded picture was in 'PNG' format:
extract_images --prefix=picture hpr4283.html
hpr4283_picture_1.png
=item B<--[no]backup>
The HTML is modified to leave placeholders referring to any embedded images
found as it it processed. The new HTML is written to the original file if
B<--nobackup> is specified, or a backup of the original file is made, before
writing the new HTML. Making a backup is the default behaviour.
=item B<--[no]force>
Images are written to the current directory. If an image already exists, and
B<--force> has not been specified, the script will stop processing and exit.
The default setting of this option is B<--noforce>.
=item B<--[no]silent>
By default the script reports its progress as it processes an HTML file, but
this can be turned off by using this option.
=back
=head1 DESCRIPTION
This Perl script B<extract_images> parses HTML looking for embedded images.
Such images use a URI with the scheme 'data' followed by image details
including the encoded binary contents. See the Wikipedia article
https://en.wikipedia.org/wiki/Data_URI_scheme for details.
When such images are found they are written as files (with generated names),
and the HTML is updated with these names replacing the original URI. Further
work is likely to be required to build full image links, but the majority of
work will have been done.
The script will not overwrite image files unless the B<--force> option is
used, but will overwrite the original HTML file unless B<--backup> is
specified.
By default details of progress are written to standard output, but this can be
preveneted by using the B<--silent> option.
=head2 GENERATED IMAGE FILE NAMES
These names are built from various elements:
<HTML filename>_<prefix>_<counter>.<extension>
Where <HTML filename> is the base name of the HTML input file, <prefix> is the
string provided with the B<--prefix=STRING> option (or B<image> by default),
<counter> is a count of image files found during the scan of the HTML, and
<extension> is the appropriate file extension for the type of image being
converted.
See the option B<--prefix=STRING> for details of file name generation.
=head1 DIAGNOSTICS
=over 4
=item B<Unable to find ...>
Type: warning
The script is attempting to find a file presented to it as an argument, but
cannot. It will skip to the next argument and continue.
=item B<File ... is not HTML>
Type: warning
The script is checking files presented to it as arguments. The named file has
been checked to see if it contains HTML, and it appears it does not. It will
skip to the next argument and continue.
=item B<Unable to open ...>
Type: fatal
The script is attempting to open a file presented to it as an argument. Its
existence has already been checked but it cannot be opened, possibly due to
a permissions issue.
=item B<HTML::TreeBuilder failed to parse notes: ...>
Type: fatal
The script has attempted to parse the HTML in a file presented to it. This has
failed. The error message includes a report from the module used to do this.
=item B<File ... exists; not overwriting>
Type: warning
The script is attempting to write an image file copied from the HTML, but has
found it already exists. Using the option B<--force> would cause it to be
overwritten, but this option is not enabled.
The script will not write the file, however, it will still modify the HTML to
reference the existing image file.
=item B<Unable to find ... to backup>
Type: warning
The script is attempting to make a backup of one of the HTML file arguments,
but is unable to find it. Since this is a rather unusual circumstance (has the
file been deleted?), it is assumed it can be regenerated if needed, so the
writing of the modified HTML is continued.
=head1 CONFIGURATION AND ENVIRONMENT
TBA
=head1 DEPENDENCIES
Data::Dumper
File::Copy
Getopt::Long
HTML::TreeBuilder
MIME::Types
Path::Tiny
Pod::Usage
URI
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
Patches are welcome.
=head1 AUTHOR
Dave Morriss (Dave.Morriss@gmail.com)
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2024 Dave Morriss (Dave.Morriss@gmail.com).
All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
#}}}
# [zo to open fold, zc to close]
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

View File

@ -361,7 +361,8 @@ try {
$content = decode_json($json_text); $content = decode_json($json_text);
} }
catch ($e) { catch ($e) {
die colored( "Failed to decode the JSON in $infile", 'red' ) . "\n" warn colored( "Failed to decode the JSON in $infile", 'red' ) . "\n";
die "Error was: $e\n";
} }
$log->info( $showno, "[$VERSION] Processing $infile" ); $log->info( $showno, "[$VERSION] Processing $infile" );

View File

@ -1 +0,0 @@
/home/cendjm/HPR/Database/query2csv

137
Show_Submission/query2csv Executable file
View File

@ -0,0 +1,137 @@
#!/usr/bin/env perl
#===============================================================================
#
# FILE: query2csv
#
# USAGE: ./query2csv query
#
# DESCRIPTION: Runs a query given as the only argument. Caution is needed
# since *any* query will be run. The result of the query is
# output in CSV form on STDOUT. The CSV is always quoted to
# cater for the more simplistic consumers.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2015-07-11 15:53:01
# REVISION: 2022-02-16 23:17:16
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Config::General;
use Text::CSV_XS;
use DBI;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Database";
my $configfile = "$basedir/.hpr_livedb.cfg";
my ( $dbh, $sth1, $aref1 );
my ( $query, $csv );
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#
# Load database configuration data
#
my $conf = Config::General->new(
-ConfigFile => $configfile,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config = $conf->getall();
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
$query = shift;
die "Usage: $PROG query\n" unless $query;
#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd = $config{database}->{password};
#$dbh = DBI->connect( "DBI:MariaDB:host=$dbhost;port=$dbport;database=$dbname",
# $dbuser, $dbpwd, { AutoCommit => 1 } )
# or die $DBI::errstr;
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
$dbuser, $dbpwd, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;
#
# Set up the query
#
$sth1 = $dbh->prepare($query) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Perform the query
#
$sth1->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Prepare to make CSV. Not sure if always quoting is the best idea though
#
$csv = Text::CSV_XS->new(
# { always_quote => 1 }
);
#
# Loop through the returned rows making and printing CSV. Each row is returned
# as an arrayref to make it easy to join everything.
#
while ( $aref1 = $sth1->fetchrow_arrayref ) {
$csv->combine(@$aref1);
print $csv->string(), "\n";
}
exit;
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

View File

@ -1 +0,0 @@
/home/cendjm/HPR/Database/query2json

134
Show_Submission/query2json Executable file
View File

@ -0,0 +1,134 @@
#!/usr/bin/env perl
#===============================================================================
#
# FILE: query2json
#
# USAGE: ./query2json query
#
# DESCRIPTION: Runs a query given as the only argument. Caution is needed
# since *any* query will be run. The result of the query is
# output in JSON form on STDOUT.
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: Had to revert to MySQL because of a problem with DBD::MariaDB
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.0.2
# CREATED: 2021-06-18 13:24:49
# REVISION: 2023-01-05 16:17:24
#
#===============================================================================
use 5.010;
use strict;
use warnings;
use utf8;
use Config::General;
use JSON;
use DBI;
use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.0.2';
#
# Script and directory names
#
( my $PROG = $0 ) =~ s|.*/||mx;
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
$DIR = '.' unless $DIR;
#-------------------------------------------------------------------------------
# Declarations
#-------------------------------------------------------------------------------
#
# Constants and other declarations
#
my $basedir = "$ENV{HOME}/HPR/Database";
my $configfile = "$basedir/.hpr_livedb.cfg";
my ( $dbh, $sth1, $aref1 );
my ( $query, $result, $json );
#
# Enable Unicode mode
#
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
#
# Load database configuration data
#
my $conf = Config::General->new(
-ConfigFile => $configfile,
-InterPolateVars => 1,
-ExtendedAccess => 1
);
my %config = $conf->getall();
#-------------------------------------------------------------------------------
# Options and arguments
#-------------------------------------------------------------------------------
$query = shift;
die "Usage: $PROG query\n" unless $query;
#-------------------------------------------------------------------------------
# Connect to the database
#-------------------------------------------------------------------------------
my $dbhost = $config{database}->{host} // '127.0.0.1';
my $dbport = $config{database}->{port} // 3306;
my $dbname = $config{database}->{name};
my $dbuser = $config{database}->{user};
my $dbpwd = $config{database}->{password};
#$dbh = DBI->connect( "DBI:MariaDB:host=$dbhost;port=$dbport;database=$dbname",
# $dbuser, $dbpwd, { AutoCommit => 1 } )
# or die $DBI::errstr;
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
$dbuser, $dbpwd, { AutoCommit => 1 } )
or die $DBI::errstr;
#
# Enable client-side UTF8
#
$dbh->{mysql_enable_utf8} = 1;
#
# Set up the query
#
$sth1 = $dbh->prepare($query) or die $DBI::errstr;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Perform the query
#
$sth1->execute;
if ( $dbh->err ) {
warn $dbh->errstr;
}
#
# Grab everything as an arrayref of hashrefs
#
$result = $sth1->fetchall_arrayref( {} );
#
# Prepare for JSON, forcing object key sorting (expensive)
#
$json = JSON->new->utf8->canonical;
#
# Encode the Perl structure to JSON
#
print $json->encode($result), "\n";
exit;
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

BIN
hpr_tags/fix_tags.bin Executable file

Binary file not shown.

View File

@ -1,44 +0,0 @@
#!/usr/bin/env bash
# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/
#============================================================
git_dir="$HOME/tmp/hpr/hpr_generator/sourcecode"
if [ ! -d "${git_dir}/.git" ]
then
git clone gitea@repo.anhonesthost.net:HPR/hpr_generator.git "${git_dir}"
fi
cd "${git_dir}"
git pull
ssh hpr -t "ls -al /home/hpr/www/hpr.sql;md5sum /home/hpr/www/hpr.sql"
ssh hpr -t "/home/hpr/bin/hpr_db_backup.bash"
ssh hpr -t "ls -al /home/hpr/www/hpr.sql;md5sum /home/hpr/www/hpr.sql"
./utils/update-hpr-db.sh
if [ $? -ne 0 ]
then
echo 'Terminating...' >&2
exit 1
fi
./site-generator --all --verbose
if [ $? -ne 0 ]
then
echo 'Terminating...' >&2
exit 1
fi
rsync -av --partial --progress "${git_dir}/public_html/" hpr:/home/hpr/public_html/
rsync -av --partial --progress "${git_dir}/public_html/" hobbypublicradio.org:hobbypublicradio.org/
cd $HOME/sourcecode/hpr/hpr_hub/
git pull
cd $HOME/sourcecode/hpr/hpr_hub/sql
split --hex-suffixes --lines=1000 --additional-suffix=.sql hpr.sql hpr-db-part-
cd $HOME/sourcecode/hpr/hpr_hub/
git add $HOME/sourcecode/hpr/hpr_hub/sql/hpr*sql
git commit -m "$(\date -u +%Y-%m-%d_%H-%M-%SZ_%A ) database changed"
git push
#xdg-open https://hackerpublicradio.org/

View File

@ -1,685 +0,0 @@
#!/usr/bin/env bash
# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/
#============================================================
PATH=$PATH:$HOME/sourcecode/hpr/hpr_hub/bin/
source $HOME/tmp/pip3.9/bin/activate
# https://hub.tcno.co/ai/whisper/install/
TEMP_DIR="/var/tmp/"
CHANNELS="2"
FIXAUDIO="1"
ARTIST="EMPTY"
TITLE="EMPTY"
YEAR="EMPTY"
SLOT="EMPTY"
basedir="/var/IA"
upload_dir="${basedir}/uploads"
theme="${basedir}/theme.wav"
outro="${basedir}/2022-03-07-outro.wav"
outro_c2="${basedir}/2022-03-07-outro_c2.wav"
ttsserver="http://localhost:5500"
processing_dir="$HOME/tmp/hpr/processing"
git_image_dir="$HOME/sourcecode/hpr/HPR_Public_Code/www/images/hosts"
acceptable_duration_difference="2" # Seconds
thedate=$(/usr/bin/date -u +%Y-%m-%d_%H-%M-%SZ_%A)
echo "Processing the next HPR Show in the queue"
if [ $( curl -s -o /dev/null -w "%{http_code}" -X 'GET' "${ttsserver}/api/voices" -H 'accept: */*' ) != "200" ]
then
echo "Please start the tts-server \"podman run -it -p 5500:5500 synesthesiam/opentts:en\""
exit
fi
function abs_diff {
echo $(($1 >= $2 ? $1 - $2 : $2 - $1))
}
function create_tts_summary {
HPR_summary="$( cat "hpr${ep_num}_summary.txt" )"
echo "INFO: Converting text \"${HPR_summary}\" to speech."
curl -X 'GET' -G --data-urlencode "voice=coqui-tts:en_ljspeech" --data-urlencode "text=${HPR_summary}" --data-urlencode "vocoder=high" --data-urlencode "denoiserStrength=0.03" --data-urlencode "cache=false" ${ttsserver}/api/tts -H 'accept: */*' --output ~hpr${ep_num}_summary.wav
}
###################
# Locate media directory
#
#
show_type=""
if [[ -f "${1}" && -n "${2}" ]]
then
echo "DEBUG: Show type is local" 1>&2
show_type="local"
mediafile="${1}"
media_dir="$( dirname "${mediafile}" )"
ep_num="${2}"
echo "The duration is \"$( \date -ud "1970-01-01 $( ffprobe -i "${mediafile}" 2>&1| awk -F ': |, ' '/Duration:/ { print $2 }' )" +%s )\"."
else
echo "DEBUG: Show type is remote" 1>&2
show_type="remote"
response=$( curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | \
grep 'SHOW_POSTED' | \
head -1 | \
sed 's/,/ /g' )
echo "DEBUG: Getting server response" 1>&2
if [ -z "${response}" ]
then
echo "INFO: There appear to be no more shows with the status \"SHOW_POSTED\"."
echo "Getting a list of all the reservations."
curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php"
exit 3
fi
timestamp_epoc="$( echo ${response} | awk '{print $1}' )"
ep_num="$( echo ${response} | awk '{print $2}' )"
ep_date="$( echo ${response} | awk '{print $3}' )"
key="$( echo ${response} | awk '{print $4}' )"
status="$( echo ${response} | awk '{print $5}' )"
email="$( echo ${response} | awk '{print $6}' )"
#source_dir="hpr:/home/hpr/upload/${timestamp_epoc}_${ep_num}_${ep_date}_${key}"
dest_dir="${timestamp_epoc}_${ep_num}_${ep_date}_${key}"
media_dir="${processing_dir}/${timestamp_epoc}_${ep_num}_${ep_date}_${key}"
fi
upload_dir_ep_num="${upload_dir}/hpr${ep_num}"
mkdir -p "${upload_dir_ep_num}" 2>/dev/null
if [ ! -d "${upload_dir_ep_num}" ]
then
echo "DEBUG: Missing directory \"upload_dir_ep_num\" \"${upload_dir}\", /hpr and \"${ep_num}\"" 1>&2
exit
fi
echo "DEBUG: Checking variables" 1>&2
if [ -z "${show_type}" ]
then
echo "sorry, variable \"show_type\" is not set."
exit
fi
if [ -z "${media_dir}" ]
then
echo "sorry, variable \"media_dir\" is not set."
exit
fi
if [ ! -d "${media_dir}" ]
then
echo "sorry, directory \"media_dir: ${media_dir}\" does not exist"
exit 1
fi
echo detox -v "${media_dir}/"
detox -vr "${media_dir}/"
echo "DEBUG: Using media directory as \"${media_dir}\""
if [[ "$( find "${media_dir}" \( -iname "hpr${ep_num}_sandwitch.wav" -o -iname "hpr${ep_num}.mp3" -o -iname "hpr${ep_num}.ogg" -o -iname "hpr${ep_num}.spx" -o -iname "hpr${ep_num}_summary.txt" -o -iname "hpr${ep_num}_summary.wav" -o -iname "hpr${ep_num}_sandwitch.wav" -o -iname "*_mez.wav" -o -iname "*_sox_norm.wav" -o -iname "*_mezzanine.wav" -o -iname "*_tmp.pcm" -o -iname "hpr${ep_num}_intro*.wav" -o -iname "~hpr${ep_num}_summary.wav" -o -iname "~~hpr${ep_num}_summary.wav" -o -iname "*_tmp.log" -o -iname "silence.wav" -o -iname "hpr${ep_num}_norm.wav" \) | wc -l )" -ne 0 ]]
then
echo "Files for this episode already exist."
find "${media_dir}" \( -iname "hpr${ep_num}_sandwitch.wav" -o -iname "hpr${ep_num}.mp3" -o -iname "hpr${ep_num}.ogg" -o -iname "hpr${ep_num}.spx" -o -iname "hpr${ep_num}_summary.txt" -o -iname "hpr${ep_num}_summary.wav" -o -iname "hpr${ep_num}_sandwitch.wav" -o -iname "*_mez.wav" -o -iname "*_sox_norm.wav" -o -iname "*_mezzanine.wav" -o -iname "*_tmp.pcm" -o -iname "hpr${ep_num}_intro*.wav" -o -iname "~hpr${ep_num}_summary.wav" -o -iname "~~hpr${ep_num}_summary.wav" -o -iname "*_tmp.log" -o -iname "silence.wav" -o -iname "hpr${ep_num}_norm.wav" \)
read -p "Shall I remove them ? (N|y) ? " -n 1 -r
echo # (optional) move to a new line
if [[ ! $REPLY =~ ^[yY]$ ]]
then
echo "skipping...."
exit
else
echo "Removing old files ...."
find "${media_dir}" \( -iname "hpr${ep_num}_sandwitch.wav" -o -iname "hpr${ep_num}.mp3" -o -iname "hpr${ep_num}.ogg" -o -iname "hpr${ep_num}.spx" -o -iname "hpr${ep_num}_summary.txt" -o -iname "hpr${ep_num}_summary.wav" -o -iname "hpr${ep_num}_sandwitch.wav" -o -iname "*_mez.wav" -o -iname "*_sox_norm.wav" -o -iname "*_mezzanine.wav" -o -iname "*_tmp.pcm" -o -iname "hpr${ep_num}_intro*.wav" -o -iname "~hpr${ep_num}_summary.wav" -o -iname "~~hpr${ep_num}_summary.wav" -o -iname "*_tmp.log" -o -iname "silence.wav" -o -iname "hpr${ep_num}_norm.wav" \) -delete -print
fi
fi
#####################################################
# Process media
media=$( find "${media_dir}" -type f -exec file {} \; | grep -Ei 'audio|mpeg|video|MP4' | awk -F ': ' '{print $1}' )
if [ -z "${media}" ]
then
echo "ERROR: Can't find any media in \"${media_dir}/\""
find "${media_dir}/" -type f
exit 1
fi
mediafile=""
echo "Found more than one media file. Please select which one to use ?"
if [ "$( echo "${media}" | wc -l )" -ne 1 ]
then
select this_media in $( echo "${media}" )
do
echo "INFO: You selected \"${this_media}\"."
ls -al "${this_media}"
mediafile="${this_media}"
break
done
else
echo "INFO: Selecting media as \"${media}\"."
mediafile="${media}"
fi
# extract file name and extension
fname="$( basename "${mediafile%.*}" )"
ext="${mediafile/*./}"
cd "${media_dir}/"
pwd
echo "INFO: Processing hpr${ep_num} from ${email}"
echo "INFO: Working directory is \"${media_dir}/\""
echo ""
if [ -z "${mediafile}" ]
then
echo "sorry, variable \"mediafile\" is not set."
exit
fi
if [ -z "${fname}" ]
then
echo "sorry, variable \"fname\" is not set."
exit
fi
if [ -z "${ext}" ]
then
echo "sorry, variable \"ext\" is not set."
exit
fi
if [ ! -f "${mediafile}" ]
then
echo "sorry, media file \"${mediafile}\" does not exist"
exit
fi
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "File information for \"${mediafile}\"" | tee -a ${fname}_tmp.log
ffprobe ${mediafile} 2>&1 | grep Audio:
mediainfo ${mediafile}
audio2image.bash ${mediafile}
xdg-open ${mediafile%.*}.png >/dev/null 2>&1 &
unset REPLY
until [[ $REPLY =~ ^[yYnN]$ ]]
do
read -p "Source Waveform look ok ? (N|y) ? " -n 1 -r
echo ""
done
if [[ ! $REPLY =~ ^[yY]$ ]]
then
echo "skipping.... $REPLY"
exit
fi
re='^[0-9]+$'
if ! [[ $ep_num =~ $re ]]
then
echo "error: episode \"${ep_num}\" is not a number"
exit 1
fi
if [ ! -f "${outro}" ]
then
echo "sorry, file \"${outro}\" does not exist"
exit 1
fi
if [ ! -f "${outro_c2}" ]
then
echo "sorry, file \"${outro_c2}\" does not exist"
exit 1
fi
if [ ! -f "${theme}" ]
then
echo "sorry, file \"${theme}\" does not exist"
exit 1
fi
if [ ! -r "${mediafile}" ]
then
echo "sorry, media file \"${mediafile}\" is not readable"
exit
fi
if [ $(ffprobe "${mediafile}" 2>&1 | grep "Audio:" | wc -l ) -eq 0 ]
then
echo "sorry, media file \"${mediafile}\" has no audio track"
exit
fi
xdg-open "https://hackerpublicradio.org/eps/hpr${ep_num}/index.html" >/dev/null 2>&1 &
mpv -vo=null "${mediafile}"
unset REPLY
until [[ $REPLY =~ ^[yYnN]$ ]]
do
read -p "Is the audio ok (n|Y) ? " -n 1 -r
echo ""
done
if [[ ! $REPLY =~ ^[yY]$ ]]
then
echo "skipping.... $REPLY"
exit
fi
echo "--------------------------------------------------------------------------------"
echo "Geting metadata for hpr${ep_num}"
while read -r line
do
field=$(echo $line | awk -F ':' '{print $1}')
case $field in
"HPR_summary")
HPR_summary=$(echo $line | grep "HPR_summary: " | cut -c 14- )
continue
;;
"HPR_album")
HPR_album=$(echo $line | grep "HPR_album: " | cut -c 12- )
continue
;;
"HPR_artist")
HPR_artist=$(echo $line | grep "HPR_artist: " | cut -c 13- )
continue
;;
"HPR_comment")
HPR_comment=$(echo $line | grep "HPR_comment: " | cut -c 14- )
continue
;;
"HPR_genre")
HPR_genre=$(echo $line | grep "HPR_genre: " | cut -c 12- )
continue
;;
"HPR_title")
HPR_title=$(echo $line | grep "HPR_title: " | cut -c 12- )
continue
;;
"HPR_track")
HPR_track=$(echo $line | grep "HPR_track: " | cut -c 12- )
continue
;;
"HPR_year")
HPR_year=$(echo $line | grep "HPR_year: " | cut -c 11- )
continue
;;
"HPR_duration")
HPR_duration=$(echo $line | grep "HPR_duration: " | cut -c 15- )
continue
;;
"HPR_explicit")
HPR_explicit=$(echo $line | grep "HPR_explicit: " | cut -c 15- )
continue
;;
"HPR_license")
HPR_license=$(echo $line | grep "HPR_license: " | cut -c 14- )
continue
;;
esac
done < <( curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/say.php?id=${ep_num}" )
if [[ -z "$HPR_album" || -z "$HPR_artist" || -z "$HPR_comment" || -z "$HPR_genre" || -z "$HPR_title" || -z "$HPR_track" || -z "$HPR_year" || -z "$HPR_summary" || -z "$HPR_duration" || -z "$HPR_explicit" || -z "$HPR_license" ]]
then
echo "Could not find information on ${ep_num}. Has the show been posted ?"
exit;
fi
echo "--------------------------------------------------------------------------------"
echo "album : $HPR_album"
echo "artist : $HPR_artist"
echo "comment : $HPR_comment"
echo "genre : $HPR_genre"
echo "title : $HPR_title"
echo "track : $HPR_track"
echo "year : $HPR_year"
echo "summary : $HPR_summary"
echo "duration : $HPR_duration"
echo "explicit : $HPR_explicit"
echo "license : $HPR_license"
echo "media_dir : ${media_dir}"
echo "mediafile : ${mediafile}"
echo "fname : ${fname}"
echo "ext : ${ext}"
if [[ $HPR_duration == "0" ]]
then
echo "The duration is set to 0. Please update the show with the correct time."
exit;
fi
#============================================================
# Preproc`s the source file
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Prepare mezzanine file" | tee -a ${fname}_tmp.log
ffmpeg -y -i ${mediafile} -ar 44100 -ac $CHANNELS ${fname}_mezzanine.wav >> ${fname}_tmp.log 2>&1
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Add HPR Branding" | tee -a ${fname}_tmp.log
echo "Creating the summary" | tee -a ${fname}_tmp.log
#echo "$HPR_summary" - | text2wave -F 44100 - -o hpr${ep_num}_summary.wav #festival --tts
#echo "$HPR_summary" - | espeak -w ~hpr${ep_num}_summary.wav
echo "$HPR_summary" > "hpr${ep_num}_summary.txt"
create_tts_summary_ok="not-ok"
while [ "${create_tts_summary_ok}" != "OK" ]
do
create_tts_summary
xdg-open "hpr${ep_num}_summary.txt" 2>&1 &
mpv --speed=1.8 ~hpr${ep_num}_summary.wav
read -p "Is the text to speech correct (y|N) ? " -n 1 -r
echo # (optional) move to a new line
if [[ $REPLY =~ ^[Yy]$ ]]
then
create_tts_summary_ok="OK"
else
echo "WARN: Please correct the text and try again."
inotifywait --event modify "hpr${ep_num}_summary.txt"
fi
done
echo "INFO: TTS complete."
ffmpeg -y -i ~hpr${ep_num}_summary.wav -ar 44100 ~~hpr${ep_num}_summary.wav >> ${fname}_tmp.log 2>&1
sox -V2 -n -r 44100 -c 1 silence.wav trim 0.0 6.0 >> ${fname}_tmp.log 2>&1
sox -V2 silence.wav ~~hpr${ep_num}_summary.wav hpr${ep_num}_summary.wav >> ${fname}_tmp.log 2>&1
echo "Adding the theme" | tee -a ${fname}_tmp.log
sox -V2 -m "hpr${ep_num}_summary.wav" "${theme}" "hpr${ep_num}_intro.wav" >> ${fname}_tmp.log 2>&1
if [ ${CHANNELS} -eq "2" ]
then
echo "Converting mono to Stero" | tee -a ${fname}_tmp.log
ffmpeg -y -i "hpr${ep_num}_intro.wav" -ac 2 "hpr${ep_num}_intro_c2.wav"
echo "Creating the sandwitch: \"hpr${ep_num}_intro_c2.wav\" \"${fname}_mezzanine.wav\" \"${outro}\" \"hpr${ep_num}_sandwitch.wav\" " | tee -a ${fname}_tmp.log
sox -V2 "hpr${ep_num}_intro_c2.wav" "${fname}_mezzanine.wav" "${outro_c2}" "hpr${ep_num}_sandwitch.wav" >> ${fname}_tmp.log 2>&1
else
echo "Creating the sandwitch: \"hpr${ep_num}_intro.wav\" \"${fname}_mezzanine.wav\" \"${outro}\" \"hpr${ep_num}_sandwitch.wav\" " | tee -a ${fname}_tmp.log
sox -V2 "hpr${ep_num}_intro.wav" "${fname}_mezzanine.wav" "${outro}" "hpr${ep_num}_sandwitch.wav" >> ${fname}_tmp.log 2>&1
fi
echo "Normalizing the wav files" | tee -a ${fname}_tmp.log
#sox --temp "${TEMP_DIR}" --norm hpr${ep_num}_sandwitch.wav hpr${ep_num}_norm.wav
#ffmpeg -y -i hpr${ep_num}_sandwitch.wav -filter:a "dynaudnorm=p=0.9:s=5" hpr${ep_num}_norm.wav >> ${fname}_tmp.log 2>&1
ffmpeg -y -i hpr${ep_num}.wav -af loudnorm=I=-16:LRA=11:TP=-1.5 hpr${ep_num}_norm.wav >> ${fname}_tmp.log 2>&1
if [ ! -d "${upload_dir_ep_num}" ]
then
echo "DEBUG: Missing directory \"upload_dir_ep_num\" \"${upload_dir}\", /hpr and \"${ep_num}\"" 1>&2
exit
fi
if [ ! -e "hpr${ep_num}_norm.wav" ]
then
echo "DEBUG: Missing file \"hpr${ep_num}_norm.wav\"" 1>&2
exit
fi
cp -v hpr${ep_num}_norm.wav ${upload_dir_ep_num}/hpr${ep_num}.wav >> ${fname}_tmp.log 2>&1
if [ ! -e "${upload_dir_ep_num}/hpr${ep_num}.wav" ]
then
echo "DEBUG: Missing file \"${upload_dir_ep_num}/hpr${ep_num}.wav\"" 1>&2
exit
fi
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "File information" | tee -a ${fname}_tmp.log
ffprobe ${upload_dir_ep_num}/hpr${ep_num}.wav 2>&1 | grep Audio:
mediainfo ${upload_dir_ep_num}/hpr${ep_num}.wav
audio2image.bash ${upload_dir_ep_num}/hpr${ep_num}.wav
xdg-open ${upload_dir_ep_num}/hpr${ep_num}.png >/dev/null 2>&1 &
read -p "Processed Waveform look ok ? (N|y) ? " -n 1 -r
echo # (optional) move to a new line
if [[ ! $REPLY =~ ^[yY]$ ]]
then
echo "skipping...."
exit
fi
#rm -v ${upload_dir_ep_num}/hpr${ep_num}.png
echo "--------------------------------------------------------------------------------"
echo "Geting transcript of hpr${ep_num}"
echo whisper --model tiny --language en --output_dir "${media_dir}" "${upload_dir_ep_num}/hpr${ep_num}.wav" | tee -a ${fname}_tmp.log
whisper --model tiny --language en --output_dir "${media_dir}" "${upload_dir_ep_num}/hpr${ep_num}.wav" | tee -a ${fname}_tmp.log
ls -al "${media_dir}/hpr${ep_num}".*
xdg-open "${media_dir}/hpr${ep_num}".txt 2>&1 &
echo mpv --no-audio-display --audio-channels=stereo --speed="3.5" "${media_dir}/hpr${ep_num}".txt
unset REPLY
until [[ $REPLY =~ ^[yYnN]$ ]]
do
read -p "Processed transcript look ok ? (N|y) ? " -n 1 -r
echo ""
done
if [[ ! $REPLY =~ ^[yY]$ ]]
then
echo "skipping.... $REPLY"
exit
fi
mkdir "${upload_dir_ep_num}/hpr${ep_num}" >/dev/null 2>&1
cp -v "${media_dir}/hpr${ep_num}".vtt "${upload_dir_ep_num}/hpr${ep_num}.vtt" | tee -a ${fname}_tmp.log
cp -v "${media_dir}/hpr${ep_num}".srt "${upload_dir_ep_num}/hpr${ep_num}.srt" | tee -a ${fname}_tmp.log
cp -v "${media_dir}/hpr${ep_num}".txt "${upload_dir_ep_num}/hpr${ep_num}.txt" | tee -a ${fname}_tmp.log
cp -v "${media_dir}/hpr${ep_num}".tsv "${upload_dir_ep_num}/hpr${ep_num}.tsv" | tee -a ${fname}_tmp.log
cp -v "${media_dir}/hpr${ep_num}".json "${upload_dir_ep_num}/hpr${ep_num}.json" | tee -a ${fname}_tmp.log
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Convert to opus" | tee -a ${fname}_tmp.log
ffmpeg -y -i ${upload_dir_ep_num}/hpr${ep_num}.wav ${upload_dir_ep_num}/hpr${ep_num}.opus 2>> ${fname}_tmp.log 1>&2
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Convert to flac" | tee -a ${fname}_tmp.log
ffmpeg -y -i ${upload_dir_ep_num}/hpr${ep_num}.wav ${upload_dir_ep_num}/hpr${ep_num}.flac 2>> ${fname}_tmp.log 1>&2
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Convert to mp3" | tee -a ${fname}_tmp.log
ffmpeg -y -i ${upload_dir_ep_num}/hpr${ep_num}.wav ${upload_dir_ep_num}/hpr${ep_num}.mp3 2>> ${fname}_tmp.log 1>&2
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Convert to ogg" | tee -a ${fname}_tmp.log
#ffmpeg -y -i ${upload_dir_ep_num}/hpr${ep_num}.wav ${upload_dir_ep_num}/hpr${ep_num}.ogg 2>> ${fname}_tmp.log 1>&2
ffmpeg -y -i ${upload_dir_ep_num}/hpr${ep_num}.wav -acodec libopus -f ogg ${upload_dir_ep_num}/hpr${ep_num}.ogg 2>> ${fname}_tmp.log 1>&2
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Convert to spx" | tee -a ${fname}_tmp.log
ffmpeg -y -i ${upload_dir_ep_num}/hpr${ep_num}.wav ${upload_dir_ep_num}/hpr${ep_num}.spx 2>> ${fname}_tmp.log 1>&2
### End conversion
intro_duration="$( mediainfo --Output=XML --Full "hpr${ep_num}_intro.wav" | xmlstarlet sel -T -t -m '/_:MediaInfo/_:media/_:track[@type="Audio"]' -v '_:Duration' -n | awk -F '.' '{print $1}' )"
outro_duration="$( mediainfo --Output=XML --Full "${outro}" | xmlstarlet sel -T -t -m '/_:MediaInfo/_:media/_:track[@type="Audio"]' -v '_:Duration' -n | awk -F '.' '{print $1}' )"
source_duration="$( mediainfo --Output=XML --Full "${mediafile}" | xmlstarlet sel -T -t -m '/_:MediaInfo/_:media/_:track[@type="Audio"]' -v '_:Duration' -n | awk -F '.' '{print $1}' )"
expected_duration=$(( ${intro_duration} + ${HPR_duration} + ${outro_duration} ))
echo "Intro(${intro_duration}) + Show(${HPR_duration}) + Outro(${outro_duration}) = ${expected_duration}"
media_error="0"
for check_file in \
${upload_dir_ep_num}/hpr${ep_num}.wav \
${upload_dir_ep_num}/hpr${ep_num}.opus \
${upload_dir_ep_num}/hpr${ep_num}.flac \
${upload_dir_ep_num}/hpr${ep_num}.mp3 \
${upload_dir_ep_num}/hpr${ep_num}.spx \
${upload_dir_ep_num}/hpr${ep_num}.ogg
do
# ${upload_dir_ep_num}/hpr${ep_num}.spx
echo "INFO: Processing the file \"${check_file}\""
if [[ ! -s "${check_file}" ]]
then
echo "ERROR: Something went wrong encoding of the file \"${check_file}\""
exit
else
mediainfo --Output=XML --Full "${check_file}" | xmlstarlet sel -T -t -m '/_:MediaInfo/_:media/_:track[@type="Audio"]' -v '_:Duration' -n | awk -F '.' '{print $1}'
this_duration=$( mediainfo --Output=XML --Full "${check_file}" | xmlstarlet sel -T -t -m '/_:MediaInfo/_:media/_:track[@type="Audio"]' -v '_:Duration' -n | awk -F '.' '{print $1}' )
if [[ $(abs_diff "${this_duration}" "${expected_duration}" ) -le "${acceptable_duration_difference}" ]]
then
echo "INFO: The file \"${check_file}\" duration of ${this_duration} () is close enough to ${expected_duration}"
else
echo "ERROR: The file \"${check_file}\" actual duration of ${this_duration} is not close enough to posted duration of ${expected_duration}."
echo " Fix or update the posted duration to ${source_duration}."
media_error="1"
fi
#${expected_duration}
fi
done
echo "Source: ${source_duration}"
if [[ "${media_error}" -eq "1" ]]
then
echo "ERROR: Media is not encoded correctly"
exit
else
echo "INFO: Media duration is correct"
fi
if [[ ! -s ${upload_dir_ep_num}/hpr${ep_num}.wav ]] || [[ ! -s ${upload_dir_ep_num}/hpr${ep_num}.opus ]] || [[ ! -s ${upload_dir_ep_num}/hpr${ep_num}.flac ]] || [[ ! -s ${upload_dir_ep_num}/hpr${ep_num}.mp3 ]] || [[ ! -s ${upload_dir_ep_num}/hpr${ep_num}.ogg ]] || [[ ! -s ${upload_dir_ep_num}/hpr${ep_num}.spx ]]
then
echo "ERROR: Something went wrong encoding the files"
exit 1
fi
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Fixing Tags" | tee -a ${fname}_tmp.log
fix_tags -album="$HPR_album" -artist="$HPR_artist" -comment="${HPR_comment} The license is ${HPR_license}" -genre="$HPR_genre" -title="$HPR_title" -track="$HPR_track" -year="$HPR_year" ${upload_dir_ep_num}/hpr${ep_num}* 2>> ${fname}_tmp.log 1>&2
fix_tags ${upload_dir_ep_num}/hpr${ep_num}*
#echo "Changing the file dates to the time of upload"
touch -r ${mediafile} hpr${ep_num}*
touch -r ${mediafile} ${upload_dir_ep_num}/hpr${ep_num}*
ls -al hpr${ep_num}* ${upload_dir_ep_num}/hpr${ep_num}* | tee -a ${fname}_tmp.log
# # echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
# # echo "Getting info for the asset table" | tee -a ${fname}_tmp.log
# #
# # echo "INSERT INTO assets (episode_id,filename,extension,size,sha1sum,mime_type,file_type) VALUES"
# #
# # for asset_file in \
# # ${upload_dir_ep_num}/hpr${ep_num}.wav \
# # ${upload_dir_ep_num}/hpr${ep_num}.opus \
# # ${upload_dir_ep_num}/hpr${ep_num}.flac \
# # ${upload_dir_ep_num}/hpr${ep_num}.mp3 \
# # ${upload_dir_ep_num}/hpr${ep_num}.spx \
# # ${upload_dir_ep_num}/hpr${ep_num}.ogg
# # do
# # size="$( ls -al "${asset_file}" | awk '{print $5}' )"
# # sha1sum="$( sha1sum "${asset_file}" | awk '{print $1}' )"
# # mime_type=$( file --dereference --brief --mime "${asset_file}" )
# # file_type=$( file --dereference --brief "${asset_file}" )
# # echo "(${ep_num},'${filename}','${extension}','${size}','${sha1sum}','${mime_type}','${file_type}'),"
# # done
echo "--------------------------------------------------------------------------------" | tee -a ${fname}_tmp.log
echo "Transferring files to Servers" | tee -a ${fname}_tmp.log
#rsync -ave ssh --partial --progress --ignore-existing ${upload_dir_ep_num}/hpr${ep_num}.mp3 ${upload_dir_ep_num}/hpr${ep_num}.ogg ${upload_dir_ep_num}/hpr${ep_num}.spx hpr:www/eps/
#firefox "https://hackerpublicradio.org/local/hpr${ep_num}.mp3" >/dev/null 2>&1 &
#firefox "https://hackerpublicradio.org/local/hpr${ep_num}.ogg" >/dev/null 2>&1 &
firefox "file://${upload_dir_ep_num}/hpr${ep_num}.mp3" >/dev/null 2>&1 &
firefox "file://${upload_dir_ep_num}/hpr${ep_num}.ogg" >/dev/null 2>&1 &
#firefox "https://hackerpublicradio.org/eps.php?id=${ep_num}" >/dev/null 2>&1 &
#mpv "http://hackerpublicradio.org/local/hpr${ep_num}.spx" "http://hackerpublicradio.org/local/hpr${ep_num}.ogg" "http://hackerpublicradio.org/local/hpr${ep_num}.mp3" "file://${upload_dir_ep_num}/hpr${ep_num}.spx" "file://${upload_dir_ep_num}/hpr${ep_num}.opus" "file://${upload_dir_ep_num}/hpr${ep_num}.ogg" "file://${upload_dir_ep_num}/hpr${ep_num}.mp3" "file://${upload_dir_ep_num}/hpr${ep_num}.flac"
mpv "file://${upload_dir_ep_num}/hpr${ep_num}.spx" "file://${upload_dir_ep_num}/hpr${ep_num}.opus" "file://${upload_dir_ep_num}/hpr${ep_num}.ogg" "file://${upload_dir_ep_num}/hpr${ep_num}.mp3" "file://${upload_dir_ep_num}/hpr${ep_num}.flac"
read -p "Remove files for \"${fname}\" (y|N) ? " -n 1 -r
echo # (optional) move to a new line
if [[ $REPLY =~ ^[Yy]$ ]]
then
mediafilename=$(basename "${mediafile}")
mediaextension="${mediafilename##*.}" # "
# ssh hpr -t "mkdir /home/hpr/www/eps/hpr${ep_num}" >/dev/null 2>&1
# rsync -ave ssh --partial --progress --ignore-existing "${mediafile}" hpr:www/eps/hpr${ep_num}/hpr${ep_num}_source.${mediaextension} | tee -a ${fname}_tmp.log
#
# rsync -ave ssh --partial --progress --ignore-existing "${media_dir}/hpr${ep_num}.wav".vtt hpr:www/eps/hpr${ep_num}/hpr${ep_num}.vtt | tee -a ${fname}_tmp.log
# rsync -ave ssh --partial --progress --ignore-existing "${media_dir}/hpr${ep_num}.wav".srt hpr:www/eps/hpr${ep_num}/hpr${ep_num}.srt | tee -a ${fname}_tmp.log
# rsync -ave ssh --partial --progress --ignore-existing "${media_dir}/hpr${ep_num}.wav".txt hpr:www/eps/hpr${ep_num}/hpr${ep_num}.txt | tee -a ${fname}_tmp.log
# f
# ssh hpr -t "ls -al /home/hpr/www/eps/hpr${ep_num}*"
cp -v "${mediafile}" "${upload_dir_ep_num}/hpr${ep_num}_source.${mediaextension}"
#echo "Remove temp files"
# rm -v ~hpr${ep_num}_summary.wav ~~hpr${ep_num}_summary.wav silence.wav
# rm -v ${fname}_mezzanine.wav ${fname}_tmp*.pcm ${fname}_tmp.log ${fname}_mez.wav
#mv -v ${fname}* hpr${ep_num}* *_${ep_num}_* /var/IA/done/
# wget --timeout=0 -q "http://hackerpublicradio.org/hpr_ogg_rss.php?gomax=1" -O - | xmlstarlet val --err -
# wget --timeout=0 -q "http://hackerpublicradio.org/hpr_mp3_rss.php?gomax=1" -O - | xmlstarlet val --err -
# wget --timeout=0 -q "http://hackerpublicradio.org/hpr_spx_rss.php?gomax=1" -O - | xmlstarlet val --err -
# wget --timeout=0 -q "http://hackerpublicradio.org/rss-future.php" -O - | xmlstarlet val --err -
echo "INFO: rsync -ave ssh --partial --progress ${upload_dir}/hpr${ep_num}* borg:/data/IA/uploads/"
rsync -ave ssh --partial --progress ${upload_dir}/hpr${ep_num}* borg:/data/IA/uploads/
echo "INFO: rsync -ave ssh --partial --progress ${upload_dir}/hpr${ep_num}* rsync.net:processing/"
rsync -ave ssh --partial --progress ${upload_dir}/hpr${ep_num}* rsync.net:processing/
echo "$( ssh borg -t "ls -al /data/IA/uploads/hpr${ep_num}*" ; ssh rsync.net -t "ls -al rsync.net:processing/hpr${ep_num}*" ; ls -al ${upload_dir}/hpr${ep_num}* )" | grep "hpr${ep_num}" | awk '{print $5, $NF}' | sort
# ken@kalani:
# rsync -av --partial --progress /var/IA/uploads/ vger:processing/
# $HOME/sourcecode/hpr/hpr_hub.wip/bin/hpr-assets.bash
# root@vger:~#
# rsync -av --progress --partial $HOME/processing/ /mnt/data/hpr/eps/
# find /mnt/data/hpr/eps/ -mindepth 1 -maxdepth 1 -type f | grep -vE 'assets.csv|assets.json' | while read i;do mv -fv ${i} $(dirname ${i})/$(basename ${i%.*} | cut -c -7 )/;done
# find /mnt/data/hpr/eps/ -type f \( -iname "*source*" -o -iname "*flac*" -o -iname "*wav*" \) -delete -print
# chcon -R --type=httpd_sys_rw_content_t /mnt/data/hpr/
# chown apache:apache /mnt/data/hpr/
# ken@kalani:
# sshfs vger:/ /mnt/sshfs/vger/
# sshfs rsync.net: /mnt/sshfs/rsync.net/
# rsync -av --partial --progress /mnt/sshfs/vger/mnt/data/hpr/eps/ /mnt/sshfs/rsync.net/hpr/eps/
# https://hpr.nyc3.cdn.digitaloceanspaces.com/eps/hpr1404/hpr1404.mp3
# https://alpha.nl.eu.mirror.hackerpublicradio.org/eps/hpr4271/hpr4271.mp3
# https://archive.org/download/hpr4268/hpr4268.ogg
# set eps.valid=1
if [ ${show_type} == "remote" ]
then
echo "INFO: Setting the status"
# SHOW_SUBMITTED → METADATA_PROCESSED → SHOW_POSTED → MEDIA_TRANSCODED → UPLOADED_TO_IA → UPLOADED_TO_RSYNC_NET
curl --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php?ep_num=${ep_num}&status=MEDIA_TRANSCODED"
curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php"
fi
else
echo "skipping...."
echo "cp -v \"${mediafile}\" \"${upload_dir_ep_num}/hpr${ep_num}_source.${mediaextension}\""
# echo "rm -v ${fname}_mezzanine.wav ${fname}_tmp*.pcm ${fname}_tmp.log ${fname}_mez.wav"
#echo "mv -v ${fname}* hpr${ep_num}* *_${ep_num}_* /var/IA/done/"
echo "wget --timeout=0 -q \"http://hackerpublicradio.org/hpr_ogg_rss.php?gomax=1\" -O - | xmlstarlet val --err -"
echo "wget --timeout=0 -q \"http://hackerpublicradio.org/hpr_mp3_rss.php?gomax=1\" -O - | xmlstarlet val --err -"
echo "wget --timeout=0 -q \"http://hackerpublicradio.org/hpr_spx_rss.php?gomax=1\" -O - | xmlstarlet val --err -"
echo "rsync -ave ssh --partial --progress ${upload_dir}/hpr${ep_num}* borg:/data/IA/uploads/"
fi

20
workflow/intro.srt Normal file
View File

@ -0,0 +1,20 @@
1
00:00:00,000 --> 00:00:13,200
REPLACE_LINE_1
2
00:00:13,200 --> 00:00:16,400
REPLACE_LINE_2
3
00:00:16,400 --> 00:00:20,040
REPLACE_LINE_3
4
00:00:20,040 --> 00:00:22,080
REPLACE_LINE_4
5
00:00:22,080 --> 00:00:27,320
REPLACE_LINE_5

BIN
workflow/outro.flac Normal file

Binary file not shown.

31
workflow/outro.srt Normal file
View File

@ -0,0 +1,31 @@
1
00:00:00,000 --> 00:00:07,720
You have been listening to Hacker Public Radio at Hacker Public Radio.org.
2
00:00:07,720 --> 00:00:11,520
Today's show was contributed by a HPR listener like yourself.
3
00:00:11,520 --> 00:00:18,000
If you ever thought of recording podcast, click on our upload link
4
00:00:18,000 --> 00:00:19,000
to find out how easy it is.
5
00:00:19,000 --> 00:00:26,480
Hosting for HPR has been kindly provided by an AnHonestHost.com, the Internet Archive,
6
00:00:26,480 --> 00:00:27,480
rsync.net, and our mirror network.
7
00:00:27,480 --> 00:00:33,480
Unless otherwise stated, today's show is released under a Creative Commons
8
00:00:33,480 --> 00:00:36,480
Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

View File

@ -1,339 +0,0 @@
#!/usr/bin/env bash
# Copyright Ken Fallon - Released into the public domain. http://creativecommons.org/publicdomain/
#============================================================
processing_dir="$HOME/tmp/hpr/processing" # The directory where the files will be copied to for processing
if [ ! -d "${processing_dir}" ]
then
echo "ERROR: The application \"${this_program}\" is required but is not installed."
exit 1
fi
###################
# Check that all the programs are installed
function is_installed () {
for this_program in "$@"
do
if ! command -v ${this_program} 2>&1 >/dev/null
then
echo "ERROR: The application \"${this_program}\" is required but is not installed."
exit 2
fi
done
}
is_installed awk base64 cat curl curl date detox eval ffprobe file find grep grep head jq jq kate magick mediainfo mv mv rsync rsync seamonkey sed sed sort sponge ssh touch touch wget
echo "Processing the next HPR Show in the queue"
###################
# Get the show
#
# Replaced METADATA_PROCESSED with SHOW_SUBMITTED
response=$( curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | \
grep ',SHOW_SUBMITTED,' | \
head -1 | \
sed 's/,/ /g' )
if [ -z "${response}" ]
then
echo "INFO: There appear to be no more shows with the status \"SHOW_SUBMITTED\"."
echo "Getting a list of all the reservations."
curl --silent --netrc-file ${HOME}/.netrc "https://hub.hackerpublicradio.org/cms/status.php" | sort -n
exit 3
fi
timestamp_epoc="$( echo ${response} | awk '{print $1}' )"
ep_num="$( echo ${response} | awk '{print $2}' )"
ep_date="$( echo ${response} | awk '{print $3}' )"
key="$( echo ${response} | awk '{print $4}' )"
status="$( echo ${response} | awk '{print $5}' )"
email="$( echo ${response} | awk '{print $6}' )"
email_unpadded="$( echo $email | sed 's/.nospam@nospam./@/g' )"
upload_dir="/home/hpr/upload/${timestamp_epoc}_${ep_num}_${ep_date}_${key}"
source_dir="hpr:${upload_dir}"
publish_dir="hpr:www/eps/hpr${ep_num}"
dest_dir="${timestamp_epoc}_${ep_num}_${ep_date}_${key}"
ssh hpr -t "detox -v ${upload_dir}/"
echo "INFO: Downloading hpr${ep_num} from ${email_unpadded}"
echo ""
echo rsync -ave ssh --partial --progress ${source_dir}/ ${processing_dir}/${dest_dir}/
rsync -ave ssh --partial --progress ${source_dir}/ ${processing_dir}/${dest_dir}/
echo ""
echo "INFO: Working directory is \"${processing_dir}/${dest_dir}/\""
echo ""
shownotes_json="${processing_dir}/${dest_dir}/shownotes.json"
if [ ! -s "${shownotes_json}" ]
then
echo "ERROR: \"${shownotes_json}\" is missing"
exit 4
fi
if [ "$( file "${shownotes_json}" | grep -ic "text" )" -eq 0 ]
then
echo "ERROR: \"${shownotes_json}\" is not a text file"
exit 5
fi
mv -v "${shownotes_json}" "${shownotes_json%.*}_origional.json"
jq '.' "${shownotes_json%.*}_origional.json" > "${shownotes_json}"
###################
# Get the media
#
remote_media="$( jq --raw-output '.metadata.url' "${shownotes_json}" )"
if [ -n "${remote_media}" ]
then
echo "INFO: Fetching remote media from \"${remote_media}\""
wget --timestamping --directory-prefix="${processing_dir}/${dest_dir}/" "${remote_media}"
if [ $? -ne 0 ]
then
echo "ERROR: Could not get the remote media"
exit 6
fi
fi
shownotes_html="${processing_dir}/${dest_dir}/shownotes.html"
jq --raw-output '.episode.Show_Notes' "${shownotes_json}" > "${shownotes_html}"
if [ ! -s "${shownotes_html}" ]
then
echo "ERROR: \"${shownotes_html}\" is missing"
exit 7
fi
# Process Shownotes
sed "s#>#>\n#g" "${shownotes_html}" | sponge "${shownotes_html}"
# Extract Images
## TODO Temp fix until https://repo.anhonesthost.net/HPR/hpr-tools/issues/3
image_count="1"
for image in $( grep --color=never --perl-regexp --only-matching 'data:image/[^;]*;base64,\K[a-zA-Z0-9+/=]*' "${shownotes_html}" )
do
this_image="${processing_dir}/${dest_dir}/hpr${ep_num}_${image_count}"
echo -n "$image" | base64 -di > ${this_image}
this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )"
mv -v "${this_image}" "${this_image}.${this_ext}"
this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )"
if [ "${this_width}" -gt "400" ]
then
magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}"
fi
((image_count=image_count+1))
done
for image in $( grep --color=never --perl-regexp --only-matching '<img.*src.*http.*>' "${shownotes_html}" | awk -F 'src=' '{print $2}' | awk -F '"' '{print $2}' )
do
this_image="${processing_dir}/${dest_dir}/hpr${ep_num}_${image_count}"
wget "${image}" --output-document=${this_image}
this_ext="$( file --mime-type ${this_image} | awk -F '/' '{print $NF}' )"
if [ ! -e "${this_image}.${this_ext}" ]
then
mv -v "${this_image%.*}" "${this_image}.${this_ext}"
fi
this_width="$( mediainfo "${this_image}.${this_ext}" | grep Width | awk -F ': | pixels' '{print $2}' | sed 's/ //g' )"
if [ "${this_width}" -gt "400" ]
then
magick "${this_image}.${this_ext}" -resize 400x "${this_image}_tn.${this_ext}"
fi
((image_count=image_count+1))
done
## TODO End Temp fix until https://repo.anhonesthost.net/HPR/hpr-tools/issues/3
ls -al "${processing_dir}/${dest_dir}/"
## Manually edit the shownotes to fix issues
kate "${shownotes_html}" >/dev/null 2>&1 &
# librewolf "${shownotes_html}" >/dev/null 2>&1 &
seamonkey "${shownotes_html}" >/dev/null 2>&1 &
# bluefish "${shownotes_html}" >/dev/null 2>&1 &
read -p "Does the metadata 'look ok ? (N|y) ? " -n 1 -r
echo # (optional) move to a new line
if [[ ! $REPLY =~ ^[yY]$ ]]
then
echo "skipping...."
exit 8
fi
media=$( find "${processing_dir}/${dest_dir}/" -type f -exec file {} \; | grep -Ei 'audio|mpeg|video|MP4' | awk -F ': ' '{print $1}' )
if [ -z "${media}" ]
then
echo "ERROR: Can't find any media in \"${processing_dir}/${dest_dir}/\""
find "${processing_dir}/${dest_dir}/" -type f
exit 9
fi
the_show_media=""
if [ "$( echo "${media}" | wc -l )" -ne 1 ]
then
echo "Multiple files found. Which one do you want to use ?"
select this_media in $( echo "${media}" )
do
echo "INFO: You selected \"${this_media}\"."
ls -al "${this_media}"
the_show_media="${this_media}"
break
done
else
echo "INFO: Selecting media as \"${media}\"."
the_show_media="${media}"
fi
duration=$( \date -ud "1970-01-01 $( ffprobe -i "${the_show_media}" 2>&1| awk -F ': |, ' '/Duration:/ { print $2 }' )" +%s )
if [ $? -ne 0 ]
then
echo 'ERROR: Invalid duration found in '\"${media}\" >&2
exit 10
fi
###################
# Gather episode information
#
if [ "$( curl --silent --write-out '%{http_code}' http://hackerpublicradio.org/say.php?id=${ep_num} --output /dev/null )" == 200 ]
then
echo "ERROR: The Episode hpr${ep_num} has already been posted"
exit 11
fi
if [ "$( jq --raw-output '.metadata.Episode_Number' ${shownotes_json} )" != "${ep_num}" ]
then
echo "ERROR: The Episode_Number: \"${ep_num}\" was not found in \"${shownotes_json}\""
exit 12
fi
if [ "$( jq --raw-output '.metadata.Episode_Date' ${shownotes_json} )" != "${ep_date}" ]
then
echo "ERROR: The Episode_Date: \"${ep_date}\" was not found in \"${shownotes_json}\""
exit 13
fi
if [ "$( jq --raw-output '.host.Host_Email' ${shownotes_json} )" != "${email_unpadded}" ]
then
echo "ERROR: The Host_Email: \"${email_unpadded}\" was not found in \"${shownotes_json}\""
exit 14
fi
###################
# Assemble the components
#
# https://newbedev.com/how-to-urlencode-data-for-curl-command/
hostid="$( jq --raw-output '.host.Host_ID' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
host_name="$( jq --raw-output '.host.Host_Name' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
title=$( jq --raw-output '.episode.Title' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )
summary="$( jq --raw-output '.episode.Summary' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
series_id="$( jq --raw-output '.episode.Series' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
series_name="$( jq --raw-output '.episode.Series_Name' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
explicit="$( jq --raw-output '.episode.Explicit' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
episode_license="$( jq --raw-output '.episode.Show_License' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
tags="$( jq --raw-output '.episode.Tags' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
host_license=$( jq --raw-output '.host.Host_License' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )
host_profile=$( jq --raw-output '.host.Host_Profile' ${shownotes_json} | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )
notes="$( cat "${shownotes_html}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
if [ $# -gt 0 ]
then
declare -A hash
for argument
do
if [[ $argument =~ ^[^=]+=.*$ ]]
then
this_key="${argument%=*}" # "${} Kate format hack
this_value="${argument#*=}" # "${} Kate format hack
this_value="$( echo "${this_value}" | jq --slurp --raw-input @uri | sed -e 's/%0A"$//g' -e 's/^"//g' )"
eval "${this_key}=${this_value}"
echo "INFO: Replacing \"${this_key}\" with \"${this_value}\"."
fi
done
fi
if [ "${hostid}" == '0' ]
then
echo "ERROR: The hostid is 0. Create the host and use \"hostid=???\" to override"
exit 15
fi
###################
# Post show to HPR
#
post_show_json="${processing_dir}/${dest_dir}/post_show.json"
post_show_response="${processing_dir}/${dest_dir}/post_show_response.txt"
echo "Sending:"
cat "${post_show}"
echo "key=${key}
ep_num=${ep_num}
ep_date=${ep_date}
email=${email}
title=${title}
duration=${duration}
summary=${summary}
series_id=${series_id}
series_name=${series_name}
explicit=${explicit}
episode_license=${episode_license}
tags=${tags}
hostid=${hostid}
host_name=${host_name}
host_license=${host_license}
host_profile=${host_profile}
notes=${notes}"
echo "{
\"key\": \"${key}\",
\"ep_num\": \"${ep_num}\",
\"ep_date\": \"${ep_date}\",
\"email\": \"${email}\",
\"title\": \"${title}\",
\"duration\": \"${duration}\",
\"summary\": \"${summary}\",
\"series_id\": \"${series_id}\",
\"series_name\": \"${series_name}\",
\"explicit\": \"${explicit}\",
\"episode_license\": \"${episode_license}\",
\"tags\": \"${tags}\",
\"hostid\": \"${hostid}\",
\"host_name\": \"${host_name}\",
\"host_license\": \"${host_license}\",
\"host_profile\": \"${host_profile}\",
\"notes\": \"${notes}\"
}" | tee "${post_show_json}"
echo "INFO: Uploading processed files to \"${processing_dir}/${dest_dir}/\""
echo ""
echo rsync -ave ssh --partial --progress ${processing_dir}/${dest_dir}/ ${source_dir}/
rsync -ave ssh --partial --progress ${processing_dir}/${dest_dir}/ ${source_dir}/
echo ""
curl --netrc --include --request POST "https://hub.hackerpublicradio.org/cms/add_show_json.php" --header "Content-Type: application/json" --data-binary "@${post_show_json}"
if [ "$( curl --silent --write-out '%{http_code}' http://hackerpublicradio.org/say.php?id=${ep_num} --output /dev/null )" != 200 ]
then
echo "ERROR: The Episode hpr${ep_num} has not been posted"
exit 16
fi
echo "INFO: Uploading completed files to \"${publish_dir}/\""
echo ""
echo "rsync -ave ssh --partial --progress \"${processing_dir}/${dest_dir}/hpr${ep_num}\"* \"${publish_dir}/\""
rsync -ave ssh --partial --progress "${processing_dir}/${dest_dir}/hpr${ep_num}"* "${publish_dir}/"
echo ""

1458
workflow/process_episode.bash Executable file

File diff suppressed because it is too large Load Diff

12
workflow/remove-image.pl Executable file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env perl
use strict;
while (<>) {
s/(<img.*src.*data:image\/[^;]*;base64,[a-zA-Z0-9+\/=]*)/<img src="LOCAL_IMAGE_REMOVED/g;
s/(<img.*src.*http.*>)/<img src="REMOTE_IMAGE_REMOVED" \/>/g;
print;
}
exit
# <img src="data:image/jpeg;base64,/9j/4QnuRXhpZgAATU

BIN
workflow/silence.flac Normal file

Binary file not shown.

BIN
workflow/theme.flac Normal file

Binary file not shown.