dc138596ea
site-generator: Cosmetic adjustments. Additions to the POD documentation. Additions to module list. Additions to work better with UTF-8. Addition of functions 'parse_csv' and 'xml_entity' templates/queries-episodes-sqlite.tpl.html: templates/shared-utils.tpl.html: Cosmetic changes templates/rss-query-hpr-mysql.tpl.xml: templates/rss-query-hpr-sqlite.tpl.xml: templates/rss-query-hpr_total-mysql.tpl.xml: templates/rss-query-hpr_total-sqlite.tpl.xml: Enhancements to allow the query to collect the audio length from the 'assets' table. The audio file extension is passed as an argument to the 'execute' statement. templates/rss.tpl.xml: Cosmetic changes Changed one 'php' URL to 'html'. templates/shared-episode-summary.tpl.html: Change to 'display_tags' macro to turn the 'eps.tags' field into a list of links. This works, but needs further development because using the tag strings as anchor ids is not reliable. templates/shared-item.tpl.xml: Cosmetic changes. Addition of filter 'HTML.strip' which is used as a means of removing HTML tags from '<itunes:summary>' strings. Using new filter 'xml_entity' which converts all non-ASCII characters in the notes to numeric hexadecimal entities for '<itunes:summary>'. The '<enclosure>' tag now uses 'episode.length' rather than 'episode.duration' which has been extracted from the 'assets' table.
422 lines
14 KiB
Perl
Executable File
422 lines
14 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
|
|
# {{{ POD documentation
|
|
|
|
=head1 NAME
|
|
|
|
site-generator - HPR Site Generator
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
site-generator [OPTION]... PAGE|PAGE=<comma separated list of ids>...
|
|
|
|
-a, --all generate all pages defined in configuration file
|
|
-c, --configuration path to configuration file
|
|
-l, --list print list of configured pages
|
|
-p, --preview print generated pages to standard out
|
|
-q, --quiet suppress progress information while generating pages
|
|
-v, --verbose print extended progress information while generating pages
|
|
--help print this help message
|
|
|
|
Where I<PAGE> is a file name of a web page
|
|
or the special I<ALL> (to generate all pages).
|
|
|
|
Examples:
|
|
|
|
Generate two specific pages:
|
|
site-generator index about
|
|
|
|
Generate the whole site:
|
|
site-generator --all
|
|
|
|
Generate pages based on the same template:
|
|
site-generator correspondent=1,3,5..10
|
|
|
|
Generate two specific pages with a different configuration:
|
|
site-generator --configuration=site_sqlite.cfg index about
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This is a site generator for the Hacker Public Radio website based upon the
|
|
Perl Template Toolkit.
|
|
|
|
=head1 INSTALLATION
|
|
|
|
With SQLite
|
|
* Create the sqlite3 database from the hpr.sql MySQL dump file available on
|
|
hackerpublicradio.org. The default name for the database file is "hpr.db"
|
|
and should be located in the root of the project directory. The name and
|
|
location can be set in the site.cfg file.
|
|
* An "update-hpr.sh" helper script is available in the utils directory. This
|
|
script will download the hpr.sql file, convert it to the SQLite hpr.db file,
|
|
and regenerate the website using the site-generator.
|
|
1. `cd` into the root of the project directory
|
|
2. Run `./utils/update-hpr.sh`
|
|
* SQLite v3.8.3 or greater is recommended. CTE WITH clauses are used in some template queries.
|
|
Must convert WITH clauses to sub-queries when using earlier versions of SQLite.
|
|
|
|
With MySQL
|
|
* Create database hpr_hpr in the MySQL server from HPR dump file.
|
|
- sudo mysql --host=localhost < hpr.sql
|
|
* Create a user that will be used by the site-generator.
|
|
- Suggested username: hpr-generator
|
|
- CREATE USER 'hpr-generator'@'localhost' IDENTIFIED BY '<password>';
|
|
* Limit the user's privileges to EXECUTE and SELECT
|
|
- GRANT SELECT ON hpr_hpr.* TO 'hpr-generator'@'localhost';
|
|
- GRANT EXECUTE ON `hpr_hpr`.* TO 'hpr-generator'@'localhost';
|
|
|
|
Install the needed Perl modules using preferred method (distribution packages, CPAN, etc.)
|
|
* GetOpt
|
|
* Pod::Usage
|
|
* Config::Std
|
|
* Template
|
|
* Template::Plugin::File
|
|
* Template::Plugin::DBI
|
|
* DBI
|
|
* Tie::DBI
|
|
* DBD::SQLite or DBD:mysql
|
|
* Date::Calc
|
|
* Text::CSV_XS
|
|
|
|
=head1 AUTHOR
|
|
|
|
Roan Horning <roan.horning@no-spam.gmail.com>
|
|
|
|
=head1 LICENSE
|
|
|
|
site-generator -- a static website generator for HPR
|
|
Copyright (C) 2022 Roan Horning
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
=cut
|
|
|
|
# }}}
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use Getopt::Long qw(:config auto_help);
|
|
use Pod::Usage;
|
|
use Config::Std;
|
|
use Text::CSV_XS;
|
|
use HTML::Entities qw(encode_entities_numeric);
|
|
use Template;
|
|
use Data::Dumper;
|
|
|
|
binmode STDOUT, ":encoding(UTF-8)";
|
|
binmode STDERR, ":encoding(UTF-8)";
|
|
|
|
exit main();
|
|
|
|
sub main {
|
|
|
|
# Argument parsing
|
|
my $all;
|
|
my $configuration_path;
|
|
my $preview;
|
|
my $verbose;
|
|
my $quiet;
|
|
GetOptions(
|
|
'all' => \$all,
|
|
'configuration=s' => \$configuration_path,
|
|
'list' => \&print_available_pages,
|
|
'preview' => \$preview,
|
|
'verbose' => \$verbose,
|
|
'quiet' => \$quiet,
|
|
) or pod2usage(1);
|
|
pod2usage(1) unless @ARGV || $all;
|
|
my (@page_args) = @ARGV;
|
|
|
|
if ($quiet) {
|
|
$verbose = 'quiet';
|
|
};
|
|
|
|
if (!$configuration_path) {
|
|
$configuration_path = "site.cfg";
|
|
}
|
|
|
|
my %config;
|
|
if ( -f $configuration_path ) {
|
|
# Load config file
|
|
read_config $configuration_path => %config;
|
|
}
|
|
else {
|
|
print STDOUT "Could not read configuration file: $configuration_path\n";
|
|
exit 1;
|
|
}
|
|
|
|
my $tt = get_template_html($config{DBI}, $config{app_paths});
|
|
|
|
#
|
|
# Define a TT² vmethod called 'csv_parse', it takes a scalar value and
|
|
# returns an arrayref. Also define a filter called 'xml_entity' which
|
|
# numerically encodes non-ASCII characters.
|
|
#
|
|
$tt->context->define_vmethod( 'scalar', 'csv_parse', \&parse_csv );
|
|
$tt->context->define_filter( 'xml_entity', \&xml_entity );
|
|
|
|
# If command line option all is set, parse configuration file
|
|
# for all pages
|
|
if ($all) {
|
|
@page_args = keys %config;
|
|
|
|
# Remove non page sections of the configuration file
|
|
# from the generated list of pages.
|
|
@page_args= grep { $_ ne 'DBI' } @page_args;
|
|
@page_args= grep { $_ ne 'root_template' } @page_args;
|
|
@page_args= grep { $_ ne 'app_paths' } @page_args;
|
|
|
|
};
|
|
foreach my $page_arg (@page_args) {
|
|
my %parsed_arg = parse_page_arg($page_arg);
|
|
if (exists($config{$parsed_arg{'page'}})) {
|
|
my $page_config = $config{$parsed_arg{'page'}};
|
|
$page_config->{'page'} = $parsed_arg{'page'};
|
|
|
|
# Set page's root_template to the default root_template if the
|
|
# page root_template property is not set in the configuration file.
|
|
if (exists $page_config->{'root_template'} == 0) {
|
|
$page_config->{'root_template'} = $config{root_template}{content};
|
|
}
|
|
|
|
# Set all config root_template properties as default page config properties
|
|
# except the previously set root_template content property
|
|
my @root_args = grep { $_ ne 'content' } keys %{$config{root_template}};
|
|
foreach my $root_arg (@root_args) {
|
|
if (exists $page_config->{$root_arg} == 0) {
|
|
$page_config->{$root_arg} = $config{root_template}{$root_arg};
|
|
}
|
|
}
|
|
|
|
if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') {
|
|
# Empty arrayref bug fixed, so count is reduced by 1
|
|
# was: if (scalar @{$parsed_arg{'ids'}} == 1) {
|
|
if (scalar @{$parsed_arg{'ids'}} == 0) {
|
|
@{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config);
|
|
}
|
|
foreach my $id (@{$parsed_arg{'ids'}}) {
|
|
$page_config->{'id'} = $id;
|
|
verbose ($verbose, "Generating page: $page_config->{'page'} with id: $id");
|
|
generate_page($tt, \$page_config, $preview);
|
|
}
|
|
}
|
|
else {
|
|
verbose ($verbose, "Generating page: $page_config->{'page'}");
|
|
generate_page($tt, \$page_config, $preview);
|
|
}
|
|
}
|
|
else {
|
|
verbose (1, "\nWarning: Page $parsed_arg{'page'} is not defined in the configuration file.");
|
|
}
|
|
}
|
|
verbose (1, "\nFinished processing the files.");
|
|
return 0;
|
|
}
|
|
|
|
sub get_template_html (\%@) {
|
|
# For an HTML based Template file, define the
|
|
# template start and end tags to also function as
|
|
# HTML comments to make the template file valid HTML.
|
|
#
|
|
return Template->new(
|
|
{ INCLUDE_PATH => $_[1]{templates_path},
|
|
OUTPUT_PATH => $_[1]{output_path},
|
|
ENCODING => 'utf8',
|
|
EVAL_PERL => 1,
|
|
START_TAG => '<!--%',
|
|
END_TAG => '%-->',
|
|
PRE_CHOMP => 1,
|
|
POST_CHOMP => 1,
|
|
CONSTANTS => {
|
|
database => $_[0]{database},
|
|
driver => $_[0]{driver},
|
|
user => $_[0]{user},
|
|
password => $_[0]{password},
|
|
}
|
|
}
|
|
) || die $Template::ERROR, "\n";
|
|
|
|
}
|
|
|
|
sub generate_page {
|
|
my ( $tt, $config, $preview ) = @_;
|
|
my $html;
|
|
if ( !$preview ) {
|
|
$html = get_filename($$config);
|
|
}
|
|
$tt->process( $$config->{root_template},
|
|
$$config, $html, { binmode => ':utf8' } )
|
|
|| die $tt->error(), "\n";
|
|
|
|
}
|
|
|
|
sub verbose {
|
|
my ($verbose, $message) = @_;
|
|
if ($verbose) {
|
|
if ($verbose ne 'quiet') {
|
|
print STDOUT "$message\n";
|
|
}
|
|
}
|
|
else {
|
|
STDOUT->autoflush(1);
|
|
print STDOUT ".";
|
|
};
|
|
}
|
|
|
|
sub parse_page_arg {
|
|
my ($page_arg) = @_;
|
|
# Split page name from page ids if available.
|
|
my ($page, $ids) = split(/=/, $page_arg);
|
|
my @ids;
|
|
|
|
if(!$ids) {
|
|
$ids = "";
|
|
}
|
|
else {
|
|
# Parse the page ids and push them onto @ids array
|
|
my @ids_by_comma = split(/\,/, $ids);
|
|
foreach my $id_by_comma (@ids_by_comma) {
|
|
my @ids_for_range = split(/\.\./, $id_by_comma);
|
|
if ((scalar @ids_for_range) == 2) {
|
|
push @ids, $ids_for_range[0]..$ids_for_range[1];
|
|
}
|
|
elsif ((scalar @ids_for_range) == 1) {
|
|
push @ids, $ids_for_range[0];
|
|
}
|
|
else {
|
|
verbose (1, "\nWarning: Page $page id range $id_by_comma could not be parsed.");
|
|
}
|
|
}
|
|
}
|
|
return ('page' => $page, 'ids' => [@ids]);
|
|
}
|
|
|
|
sub get_ids_from_db {
|
|
# Use a template to generate a string of page identifiers.
|
|
# The template should return the string in the form of
|
|
# <comma><identifier><comma><identifier>...
|
|
#
|
|
my ($tt, $config) = @_;
|
|
my $selected_ids = "";
|
|
my $id_template = "ids-$$config->{'page'}.tpl.html";
|
|
|
|
$tt->process($id_template, $$config, \$selected_ids)
|
|
|| die $tt->error(), "\n";
|
|
|
|
# Starts with a newline and comma
|
|
return split(/,/, substr($selected_ids, 2));
|
|
}
|
|
|
|
sub get_filename {
|
|
my ($config) = @_;
|
|
my $filename = "output.html";
|
|
my $base_path = "";
|
|
|
|
if ($$config{'filename'}) {
|
|
if (substr($$config{'filename'}, -1) eq '/') {
|
|
$base_path = $$config{'filename'};
|
|
}
|
|
else {
|
|
$filename = $$config{'filename'};
|
|
my $padded_index = "";
|
|
if (exists $$config{'id'} && $$config{'id'} ne "") {
|
|
$padded_index = sprintf("%04d", $$config{'id'});
|
|
}
|
|
$filename =~ s/\[id\]/$padded_index/;
|
|
return $filename;
|
|
}
|
|
}
|
|
# Default naming if full filename configuration is not supplied.
|
|
if ($$config{'multipage'} && $$config{'multipage'} eq 'true') {
|
|
my $padded_index = sprintf("%04d", $$config{'id'});
|
|
$filename = "$base_path$$config{'page'}${padded_index}.html";
|
|
}
|
|
else {
|
|
$filename = "$base_path$$config{'page'}.html";
|
|
}
|
|
return $filename;
|
|
}
|
|
|
|
sub print_available_pages {
|
|
# Load config file
|
|
read_config "site.cfg" => my %config;
|
|
|
|
my @page_args = sort ( keys %config );
|
|
|
|
# Remove non page sections of the configuration file
|
|
# from the generated list of pages.
|
|
@page_args = grep { $_ ne 'DBI' } @page_args;
|
|
@page_args = grep { $_ ne 'root_template' } @page_args;
|
|
|
|
foreach my $page_arg (@page_args) {
|
|
print "$page_arg\n";
|
|
}
|
|
exit;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: parse_csv
|
|
# PURPOSE: Parses a simple string containing CSV data
|
|
# PARAMETERS: $csv_in CSV string
|
|
# RETURNS: An arrayref containing the parsed CSV elements
|
|
# DESCRIPTION: The Text::CSV_XS module instance is created with the option
|
|
# 'allow_whitespace' to be forgiving of any spaces around the
|
|
# CSV elements and to strip them. Also, 'allow_loose_quotes' is
|
|
# forgiving of really messed up CSV.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub parse_csv {
|
|
my ($csv_in) = @_;
|
|
|
|
my $csv = Text::CSV_XS->new(
|
|
{ binary => 1,
|
|
auto_diag => 1,
|
|
allow_whitespace => 1,
|
|
allow_loose_quotes => 1
|
|
}
|
|
);
|
|
my $status = $csv->parse($csv_in);
|
|
unless ( $status ) {
|
|
warn "Failed to parse '$csv_in'\n" ;
|
|
return;
|
|
}
|
|
my @fields = $csv->fields();
|
|
|
|
return \@fields;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: xml_entity
|
|
# PURPOSE: Static filter to encode Unicode for XML
|
|
# PARAMETERS: $text String to be processed
|
|
# RETURNS: Processed text
|
|
# DESCRIPTION:
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub xml_entity {
|
|
my ($text) = @_;
|
|
|
|
encode_entities_numeric( $text );
|
|
|
|
return $text;
|
|
}
|
|
|
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|