Fixing issue #140

site-generator:

    Cosmetic adjustments. Additions to the POD documentation.
    Additions to module list.
    Additions to work better with UTF-8.
    Addition of functions 'parse_csv' and 'xml_entity'

templates/queries-episodes-sqlite.tpl.html:
templates/shared-utils.tpl.html:

    Cosmetic changes

templates/rss-query-hpr-mysql.tpl.xml:
templates/rss-query-hpr-sqlite.tpl.xml:
templates/rss-query-hpr_total-mysql.tpl.xml:
templates/rss-query-hpr_total-sqlite.tpl.xml:

    Enhancements to allow the query to collect the audio length from the
    'assets' table. The audio file extension is passed as an argument to
    the 'execute' statement.

templates/rss.tpl.xml:

    Cosmetic changes
    Changed one 'php' URL to 'html'.

templates/shared-episode-summary.tpl.html:

    Change to 'display_tags' macro to turn the 'eps.tags' field into
    a list of links. This works, but needs further development because
    using the tag strings as anchor ids is not reliable.

templates/shared-item.tpl.xml:

    Cosmetic changes.
    Addition of filter 'HTML.strip' which is used as a means of removing
    HTML tags from '<itunes:summary>' strings.
    Using new filter 'xml_entity' which converts all non-ASCII
    characters in the notes to numeric hexadecimal entities for
    '<itunes:summary>'.
    The '<enclosure>' tag now uses 'episode.length' rather than
    'episode.duration' which has been extracted from the 'assets' table.
This commit is contained in:
Dave Morriss 2023-08-19 13:34:50 +01:00
parent 7b6788731b
commit dc138596ea
10 changed files with 464 additions and 377 deletions

View File

@ -1,5 +1,7 @@
#!/usr/bin/perl #!/usr/bin/perl
# {{{ POD documentation
=head1 NAME =head1 NAME
site-generator - HPR Site Generator site-generator - HPR Site Generator
@ -36,7 +38,8 @@
=head1 DESCRIPTION =head1 DESCRIPTION
This is a site generator for the Hacker Public Radio website based upon the Perl Templates Toolkit. This is a site generator for the Hacker Public Radio website based upon the
Perl Template Toolkit.
=head1 INSTALLATION =head1 INSTALLATION
@ -74,6 +77,7 @@ This is a site generator for the Hacker Public Radio website based upon the Perl
* Tie::DBI * Tie::DBI
* DBD::SQLite or DBD:mysql * DBD::SQLite or DBD:mysql
* Date::Calc * Date::Calc
* Text::CSV_XS
=head1 AUTHOR =head1 AUTHOR
@ -99,15 +103,22 @@ This is a site generator for the Hacker Public Radio website based upon the Perl
=cut =cut
# }}}
use strict; use strict;
use warnings; use warnings;
use Getopt::Long qw(:config auto_help); use Getopt::Long qw(:config auto_help);
use Pod::Usage; use Pod::Usage;
use Config::Std; use Config::Std;
use Text::CSV_XS;
use HTML::Entities qw(encode_entities_numeric);
use Template; use Template;
use Data::Dumper; use Data::Dumper;
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
exit main(); exit main();
sub main { sub main {
@ -149,6 +160,14 @@ sub main {
my $tt = get_template_html($config{DBI}, $config{app_paths}); my $tt = get_template_html($config{DBI}, $config{app_paths});
#
# Define a TT² vmethod called 'csv_parse', it takes a scalar value and
# returns an arrayref. Also define a filter called 'xml_entity' which
# numerically encodes non-ASCII characters.
#
$tt->context->define_vmethod( 'scalar', 'csv_parse', \&parse_csv );
$tt->context->define_filter( 'xml_entity', \&xml_entity );
# If command line option all is set, parse configuration file # If command line option all is set, parse configuration file
# for all pages # for all pages
if ($all) { if ($all) {
@ -184,7 +203,7 @@ sub main {
if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') { if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') {
# Empty arrayref bug fixed, so count is reduced by 1 # Empty arrayref bug fixed, so count is reduced by 1
# if (scalar @{$parsed_arg{'ids'}} == 1) { # was: if (scalar @{$parsed_arg{'ids'}} == 1) {
if (scalar @{$parsed_arg{'ids'}} == 0) { if (scalar @{$parsed_arg{'ids'}} == 0) {
@{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config); @{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config);
} }
@ -212,9 +231,10 @@ sub get_template_html (\%@) {
# template start and end tags to also function as # template start and end tags to also function as
# HTML comments to make the template file valid HTML. # HTML comments to make the template file valid HTML.
# #
return Template->new({ return Template->new(
INCLUDE_PATH => $_[1]{templates_path}, { INCLUDE_PATH => $_[1]{templates_path},
OUTPUT_PATH => $_[1]{output_path}, OUTPUT_PATH => $_[1]{output_path},
ENCODING => 'utf8',
EVAL_PERL => 1, EVAL_PERL => 1,
START_TAG => '<!--%', START_TAG => '<!--%',
END_TAG => '%-->', END_TAG => '%-->',
@ -226,17 +246,19 @@ sub get_template_html (\%@) {
user => $_[0]{user}, user => $_[0]{user},
password => $_[0]{password}, password => $_[0]{password},
} }
}) || die $Template::ERROR, "\n"; }
) || die $Template::ERROR, "\n";
} }
sub generate_page { sub generate_page {
my ($tt, $config, $preview) = @_; my ( $tt, $config, $preview ) = @_;
my $html; my $html;
if (!$preview) { if ( !$preview ) {
$html = get_filename($$config); $html = get_filename($$config);
} }
$tt->process($$config->{root_template}, $$config, $html) $tt->process( $$config->{root_template},
$$config, $html, { binmode => ':utf8' } )
|| die $tt->error(), "\n"; || die $tt->error(), "\n";
} }
@ -258,7 +280,6 @@ sub parse_page_arg {
my ($page_arg) = @_; my ($page_arg) = @_;
# Split page name from page ids if available. # Split page name from page ids if available.
my ($page, $ids) = split(/=/, $page_arg); my ($page, $ids) = split(/=/, $page_arg);
#my @ids = [];
my @ids; my @ids;
if(!$ids) { if(!$ids) {
@ -333,15 +354,68 @@ sub print_available_pages {
# Load config file # Load config file
read_config "site.cfg" => my %config; read_config "site.cfg" => my %config;
my @page_args = sort (keys %config); my @page_args = sort ( keys %config );
# Remove non page sections of the configuration file # Remove non page sections of the configuration file
# from the generated list of pages. # from the generated list of pages.
@page_args= grep { $_ ne 'DBI' } @page_args; @page_args = grep { $_ ne 'DBI' } @page_args;
@page_args= grep { $_ ne 'root_template' } @page_args; @page_args = grep { $_ ne 'root_template' } @page_args;
foreach my $page_arg (@page_args) { foreach my $page_arg (@page_args) {
print "$page_arg\n"; print "$page_arg\n";
} }
exit; exit;
} }
#=== FUNCTION ================================================================
# NAME: parse_csv
# PURPOSE: Parses a simple string containing CSV data
# PARAMETERS: $csv_in CSV string
# RETURNS: An arrayref containing the parsed CSV elements
# DESCRIPTION: The Text::CSV_XS module instance is created with the option
# 'allow_whitespace' to be forgiving of any spaces around the
# CSV elements and to strip them. Also, 'allow_loose_quotes' is
# forgiving of really messed up CSV.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub parse_csv {
my ($csv_in) = @_;
my $csv = Text::CSV_XS->new(
{ binary => 1,
auto_diag => 1,
allow_whitespace => 1,
allow_loose_quotes => 1
}
);
my $status = $csv->parse($csv_in);
unless ( $status ) {
warn "Failed to parse '$csv_in'\n" ;
return;
}
my @fields = $csv->fields();
return \@fields;
}
#=== FUNCTION ================================================================
# NAME: xml_entity
# PURPOSE: Static filter to encode Unicode for XML
# PARAMETERS: $text String to be processed
# RETURNS: Processed text
# DESCRIPTION:
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub xml_entity {
my ($text) = @_;
encode_entities_numeric( $text );
return $text;
}
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker

View File

@ -10,14 +10,17 @@
hosts.local_image, hosts.local_image,
hosts.hostid, hosts.hostid,
hosts.host, hosts.email, hosts.host, hosts.email,
miniseries.name AS series, miniseries.id AS seriesid miniseries.name AS series, miniseries.id AS seriesid,
assets.size AS length
FROM eps FROM eps
INNER JOIN hosts ON eps.hostid = hosts.hostid INNER JOIN hosts ON eps.hostid = hosts.hostid
INNER JOIN miniseries ON eps.series = miniseries.id INNER JOIN miniseries ON eps.series = miniseries.id
WHERE eps.date < DATE_ADD(NOW(), INTERVAL 1 DAY) INNER JOIN assets ON eps.id = assets.episode_id
WHERE eps.date <= UTC_DATE()
AND assets.extension = ?
ORDER BY eps.date DESC ORDER BY eps.date DESC
LIMIT 10 LIMIT 10
') ')
%--> %-->
<!--% feed_result = query_hpr_feed.execute() %--> <!--% feed_result = query_hpr_feed.execute(media_file_extension) %-->

View File

@ -10,14 +10,17 @@
hosts.local_image, hosts.local_image,
hosts.hostid, hosts.hostid,
hosts.host, hosts.email, hosts.host, hosts.email,
miniseries.name AS series, miniseries.id AS seriesid miniseries.name AS series, miniseries.id AS seriesid,
assets.size AS length
FROM eps FROM eps
INNER JOIN hosts ON eps.hostid = hosts.hostid INNER JOIN hosts ON eps.hostid = hosts.hostid
INNER JOIN miniseries ON eps.series = miniseries.id INNER JOIN miniseries ON eps.series = miniseries.id
WHERE eps.date < date(\'now\', \'+1 days\') INNER JOIN assets ON eps.id = assets.episode_id
WHERE eps.date <= date(\'now\')
AND assets.extension = ?
ORDER BY eps.date DESC ORDER BY eps.date DESC
LIMIT 10 LIMIT 10
') ')
%--> %-->
<!--% feed_result = query_hpr_feed.execute() %--> <!--% feed_result = query_hpr_feed.execute(media_file_extension) %-->

View File

@ -10,13 +10,16 @@
hosts.local_image, hosts.local_image,
hosts.hostid, hosts.hostid,
hosts.host, hosts.email, hosts.host, hosts.email,
miniseries.name AS series, miniseries.id AS seriesid miniseries.name AS series, miniseries.id AS seriesid,
assets.size AS length
FROM eps FROM eps
INNER JOIN hosts ON eps.hostid = hosts.hostid INNER JOIN hosts ON eps.hostid = hosts.hostid
INNER JOIN miniseries ON eps.series = miniseries.id INNER JOIN miniseries ON eps.series = miniseries.id
WHERE eps.date < DATE_ADD(NOW(), INTERVAL 1 DAY) INNER JOIN assets ON eps.id = assets.episode_id
WHERE eps.date < UTC_DATE()
AND assets.extension = ?
ORDER BY eps.date DESC ORDER BY eps.date DESC
') ')
%--> %-->
<!--% feed_result = query_hpr_feed.execute() %--> <!--% feed_result = query_hpr_feed.execute(media_file_extension) %-->

View File

@ -10,13 +10,16 @@
hosts.local_image, hosts.local_image,
hosts.hostid, hosts.hostid,
hosts.host, hosts.email, hosts.host, hosts.email,
miniseries.name AS series, miniseries.id AS seriesid miniseries.name AS series, miniseries.id AS seriesid,
assets.size AS length
FROM eps FROM eps
INNER JOIN hosts ON eps.hostid = hosts.hostid INNER JOIN hosts ON eps.hostid = hosts.hostid
INNER JOIN miniseries ON eps.series = miniseries.id INNER JOIN miniseries ON eps.series = miniseries.id
WHERE eps.date < date(\'now\', \'+1 days\') INNER JOIN assets ON eps.id = assets.episode_id
WHERE eps.date <= date(\'now\')
AND assets.extension = ?
ORDER BY eps.date DESC ORDER BY eps.date DESC
') ')
%--> %-->
<!--% feed_result = query_hpr_feed.execute() %--> <!--% feed_result = query_hpr_feed.execute(media_file_extension) %-->

View File

@ -39,7 +39,7 @@
<image> <image>
<url>https://www.hackerpublicradio.org/images/hpr_feed_small.png</url> <url>https://www.hackerpublicradio.org/images/hpr_feed_small.png</url>
<title>Hacker Public Radio</title> <title>Hacker Public Radio</title>
<link>https://www.hackerpublicradio.org/about.php</link> <link>https://www.hackerpublicradio.org/about.html</link>
<description>The Hacker Public Radio Old Microphone Logo</description> <description>The Hacker Public Radio Old Microphone Logo</description>
<height>164</height> <height>164</height>
<width>144</width> <width>144</width>

View File

@ -12,7 +12,11 @@ from the series <em><a href="<!--% baseurl %-->series/<!--% zero_pad_left(series
<!--% END %--> <!--% END %-->
<!--% MACRO display_tags(tags) BLOCK %--> <!--% MACRO display_tags(tags) BLOCK %-->
<span><label>Tags:</label> <em><!--% tags %--></em>.</span> <span><label>Tags:</label> <em>
<!--% FOREACH tag IN tags.csv_parse %-->
<a href="<!--% absolute_path(baseurl) %-->tags.html#<!--% tag.lower %-->"><!--% tag %--></a><!--% IF loop.count == loop.size %-->.<!--% ELSE %-->,<!--% END %-->
<!--% END %--></em>
</span>
<!--% END %--> <!--% END %-->
<!--% MACRO display_listen_in(eps_id, episode_type) BLOCK %--> <!--% MACRO display_listen_in(eps_id, episode_type) BLOCK %-->

View File

@ -1,5 +1,6 @@
<!--% PROCESS 'shared-utils.tpl.html' %--> <!--% PROCESS 'shared-utils.tpl.html' %-->
<!--% MACRO display_item(episode, file_extension, audio_mime_type) BLOCK %--> <!--% MACRO display_item(episode, file_extension, audio_mime_type) BLOCK %-->
<!--% USE HTML.Strip %-->
<!--% IF audio_mime_type == "" %--> <!--% IF audio_mime_type == "" %-->
<!--% audio_mime_type = 'ogg' %--> <!--% audio_mime_type = 'ogg' %-->
<!--% END %--> <!--% END %-->
@ -11,13 +12,13 @@
<googleplay:author><!--% episode.email %--> (<!--% episode.host %-->)</googleplay:author> <googleplay:author><!--% episode.email %--> (<!--% episode.host %-->)</googleplay:author>
<itunes:author><!--% episode.email %--> (<!--% episode.host %-->)</itunes:author> <itunes:author><!--% episode.email %--> (<!--% episode.host %-->)</itunes:author>
<googleplay:image href="https://www.hackerpublicradio.org/images/hpr_feed_itunes.png"/> <googleplay:image href="https://www.hackerpublicradio.org/images/hpr_feed_itunes.png"/>
<link>https://www.hackerpublicradio.org/eps/hpr/<!--% zero_pad_left(episode.id) %-->/index.html</link> <link>https://www.hackerpublicradio.org/eps/hpr<!--% zero_pad_left(episode.id) %-->/index.html</link>
<description><![CDATA[<!--% episode.notes %-->]]> <description><![CDATA[<!--% episode.notes %-->]]>
</description> </description>
<itunes:summary><![CDATA[<!--% episode.notes %-->]]> <itunes:summary><![CDATA[<!--% episode.notes.substr(0, 4000) | html_strip | xml_entity %-->]]>
</itunes:summary> </itunes:summary>
<pubDate><!--% format_feed_date(episode.date) %--></pubDate> <pubDate><!--% format_feed_date(episode.date) %--></pubDate>
<enclosure url="http://hackerpublicradio.org/eps/hpr<!--% zero_pad_left(episode.id) %-->.<!--% file_extension %-->" length="<!--% episode.duration * 1000 %-->" type="audio/<!--% audio_mime_type %-->"/> <enclosure url="http://hackerpublicradio.org/eps/hpr<!--% zero_pad_left(episode.id) %-->.<!--% file_extension %-->" length="<!--% episode.length %-->" type="audio/<!--% audio_mime_type %-->"/>
<guid>http://hackerpublicradio.org/eps/hpr<!--% zero_pad_left(episode.id) %-->.<!--% file_extension %--></guid> <guid>http://hackerpublicradio.org/eps/hpr<!--% zero_pad_left(episode.id) %-->.<!--% file_extension %--></guid>
</item> </item>
<!--% END %--> <!--% END %-->

View File

@ -93,7 +93,3 @@
<!--% END %--> <!--% END %-->
<a href="<!--% absolute_path(baseurl) %-->eps/<!--% folder %--><!--% zero_pad_left(links.latest) %-->/index.html" rel="last">Latest &gt;&gt;</a></small> <a href="<!--% absolute_path(baseurl) %-->eps/<!--% folder %--><!--% zero_pad_left(links.latest) %-->/index.html" rel="last">Latest &gt;&gt;</a></small>
<!--% END %--> <!--% END %-->
<!--
vim: syntax=html:ts=8:sw=4:tw=78:et:ai:
-->