forked from rho_n/hpr_generator
		
	site-generator: Changed the routine used to generate entities in
    'HTML::Entities'; see filter function 'xml_entity'
site.cfg: Added root variable 'http_baseurl' which defines the base URL
    using 'http'
templates/rss.tpl.xml: RSS template defining the channel and inserting
    the 'item' definitions; adjusted to be in step with the PHP version.
templates/shared-item.tpl.xml: RSS template used for each 'item' in an
    RSS feed; contains a call to 'HTML::Strip' which turns off the
    addition of spaces when removing tags; adjusted to be in step with
    the PHP version.
templates/shared-utils.tpl.html: Macro collection used by other
    templates; addition of temporay macro 'display_explicit_feed_2'
    which generates 'Yes/No' strings to be in step with the PHP version.
		
	
		
			
				
	
	
		
			439 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			439 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/perl
 | 
						|
 | 
						|
# {{{ POD documentation
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
        site-generator - HPR Site Generator
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
        site-generator [OPTION]... PAGE|PAGE=<comma separated list of ids>...
 | 
						|
 | 
						|
        -a, --all           generate all pages defined in configuration file
 | 
						|
        -c, --configuration path to configuration file
 | 
						|
        -l, --list          print list of configured pages
 | 
						|
        -p, --preview       print generated pages to standard out
 | 
						|
        -q, --quiet         suppress progress information while generating pages
 | 
						|
        -v, --verbose       print extended progress information while generating pages
 | 
						|
        --help              print this help message
 | 
						|
 | 
						|
        Where I<PAGE> is a file name of a web page
 | 
						|
        or the special I<ALL> (to generate all pages).
 | 
						|
 | 
						|
        Examples:
 | 
						|
 | 
						|
                Generate two specific pages:
 | 
						|
                site-generator index about
 | 
						|
 | 
						|
                Generate the whole site:
 | 
						|
                site-generator --all
 | 
						|
 | 
						|
                Generate pages based on the same template:
 | 
						|
                site-generator correspondent=1,3,5..10
 | 
						|
 | 
						|
                Generate two specific pages with a different configuration:
 | 
						|
                site-generator --configuration=site_sqlite.cfg index about
 | 
						|
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
This is a site generator for the Hacker Public Radio website based upon the
 | 
						|
Perl Template Toolkit.
 | 
						|
 | 
						|
=head1 INSTALLATION
 | 
						|
 | 
						|
        With SQLite
 | 
						|
        * Create the sqlite3 database from the hpr.sql MySQL dump file available on
 | 
						|
          hackerpublicradio.org. The default name for the database file is "hpr.db"
 | 
						|
          and should be located in the root of the project directory. The name and
 | 
						|
          location can be set in the site.cfg file.
 | 
						|
        * An "update-hpr.sh" helper script is available in the utils directory. This
 | 
						|
          script will download the hpr.sql file, convert it to the SQLite hpr.db file,
 | 
						|
          and regenerate the website using the site-generator.
 | 
						|
                1. `cd` into the root of the project directory
 | 
						|
                2.  Run `./utils/update-hpr.sh`
 | 
						|
        * SQLite v3.8.3 or greater is recommended. CTE WITH clauses are used in some template queries.
 | 
						|
          Must convert WITH clauses to sub-queries when using earlier versions of SQLite.
 | 
						|
 | 
						|
        With MySQL
 | 
						|
        * Create database hpr_hpr in the MySQL server from HPR dump file.
 | 
						|
                - sudo mysql --host=localhost < hpr.sql
 | 
						|
        * Create a user that will be used by the site-generator.
 | 
						|
                - Suggested username: hpr-generator
 | 
						|
                - CREATE USER 'hpr-generator'@'localhost' IDENTIFIED BY '<password>';
 | 
						|
        * Limit the user's privileges to EXECUTE and SELECT
 | 
						|
                - GRANT SELECT ON hpr_hpr.* TO 'hpr-generator'@'localhost';
 | 
						|
                - GRANT EXECUTE ON `hpr_hpr`.* TO 'hpr-generator'@'localhost';
 | 
						|
 | 
						|
        Install the needed Perl modules using preferred method (distribution packages, CPAN, etc.)
 | 
						|
                * Config::Std
 | 
						|
                * DBD::SQLite or DBD:mysql
 | 
						|
                * DBI
 | 
						|
                * Data::Dumper
 | 
						|
                * Date::Calc
 | 
						|
                * GetOpt::Long
 | 
						|
                * HTML::Entities
 | 
						|
                * Pod::Usage
 | 
						|
                * Template
 | 
						|
                * Template::Plugin::DBI
 | 
						|
                * Template::Plugin::Date
 | 
						|
                * Template::Plugin::File
 | 
						|
                * Template::Plugin::HTML::Strip
 | 
						|
                * Text::CSV_XS
 | 
						|
                * Tie::DBI
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
        Roan Horning <roan.horning@no-spam.gmail.com>
 | 
						|
 | 
						|
=head1 LICENSE
 | 
						|
 | 
						|
        site-generator -- a static website generator for HPR
 | 
						|
        Copyright (C) 2022 Roan Horning
 | 
						|
 | 
						|
    This program is free software: you can redistribute it and/or modify
 | 
						|
    it under the terms of the GNU Affero General Public License as published by
 | 
						|
    the Free Software Foundation, either version 3 of the License, or
 | 
						|
    (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU Affero General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Affero General Public License
 | 
						|
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
# }}}
 | 
						|
 | 
						|
use 5.012;
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
use open ':encoding(UTF-8)';
 | 
						|
 | 
						|
use Getopt::Long qw(:config auto_help);
 | 
						|
use Pod::Usage;
 | 
						|
use Config::Std;
 | 
						|
use Text::CSV_XS;
 | 
						|
#use HTML::Entities qw(encode_entities_numeric);
 | 
						|
use HTML::Entities qw(encode_entities);
 | 
						|
use Date::Calc;
 | 
						|
use DBI;
 | 
						|
use DBD::SQLite;
 | 
						|
use Tie::DBI;
 | 
						|
use Template;
 | 
						|
use Template::Plugin::Date;
 | 
						|
use Template::Plugin::DBI;
 | 
						|
use Template::Plugin::HTML::Strip;
 | 
						|
 | 
						|
use Data::Dumper;
 | 
						|
 | 
						|
exit main();
 | 
						|
 | 
						|
sub main {
 | 
						|
 | 
						|
    # Argument parsing
 | 
						|
    my $all;
 | 
						|
    my $configuration_path;
 | 
						|
    my $preview;
 | 
						|
    my $verbose;
 | 
						|
    my $quiet;
 | 
						|
    GetOptions(
 | 
						|
        'all'             => \$all,
 | 
						|
        'configuration=s' => \$configuration_path,
 | 
						|
        'list'            => \&print_available_pages,
 | 
						|
        'preview'         => \$preview,
 | 
						|
        'verbose'         => \$verbose,
 | 
						|
        'quiet'           => \$quiet,
 | 
						|
    ) or pod2usage(1);
 | 
						|
    pod2usage(1) unless @ARGV || $all;
 | 
						|
    my (@page_args) = @ARGV;
 | 
						|
 | 
						|
    if ($quiet) {
 | 
						|
        $verbose = 'quiet';
 | 
						|
    };
 | 
						|
 | 
						|
    if (!$configuration_path) {
 | 
						|
        $configuration_path = "site.cfg";
 | 
						|
    }
 | 
						|
 | 
						|
    my %config;
 | 
						|
    if ( -f $configuration_path ) {
 | 
						|
        # Load config file
 | 
						|
        read_config $configuration_path => %config;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        print STDOUT "Could not read configuration file: $configuration_path\n";
 | 
						|
        exit 1;
 | 
						|
    }
 | 
						|
 | 
						|
    my $tt = get_template_html($config{DBI}, $config{app_paths});
 | 
						|
 | 
						|
    #
 | 
						|
    # Define a TT² vmethod called 'csv_parse', it takes a scalar value and
 | 
						|
    # returns an arrayref. Also define a filter called 'xml_entity' which
 | 
						|
    # numerically encodes non-ASCII characters.
 | 
						|
    #
 | 
						|
    $tt->context->define_vmethod( 'scalar', 'csv_parse', \&parse_csv );
 | 
						|
    $tt->context->define_filter( 'xml_entity', \&xml_entity );
 | 
						|
 | 
						|
    # If command line option all is set, parse configuration file
 | 
						|
    # for all pages
 | 
						|
    if ($all) {
 | 
						|
        @page_args = keys %config;
 | 
						|
 | 
						|
        # Remove non page sections of the configuration file
 | 
						|
        # from the generated list of pages.
 | 
						|
        @page_args= grep { $_ ne 'DBI' } @page_args;
 | 
						|
        @page_args= grep { $_ ne 'root_template' } @page_args;
 | 
						|
        @page_args= grep { $_ ne 'app_paths' } @page_args;
 | 
						|
 | 
						|
    };
 | 
						|
    foreach my $page_arg (@page_args) {
 | 
						|
        my %parsed_arg = parse_page_arg($page_arg);
 | 
						|
        if (exists($config{$parsed_arg{'page'}})) {
 | 
						|
            my $page_config = $config{$parsed_arg{'page'}};
 | 
						|
            $page_config->{'page'} = $parsed_arg{'page'};
 | 
						|
 | 
						|
            # Set page's root_template to the default root_template if the
 | 
						|
            # page root_template property is not set in the configuration file.
 | 
						|
            if (exists $page_config->{'root_template'} == 0) {
 | 
						|
                $page_config->{'root_template'} = $config{root_template}{content};
 | 
						|
            }
 | 
						|
 | 
						|
            # Set all config root_template properties as default page config properties
 | 
						|
            # except the previously set root_template content property
 | 
						|
            my @root_args = grep { $_ ne 'content' } keys %{$config{root_template}};
 | 
						|
            foreach my $root_arg (@root_args) {
 | 
						|
                if (exists $page_config->{$root_arg} == 0) {
 | 
						|
                    $page_config->{$root_arg} = $config{root_template}{$root_arg};
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') {
 | 
						|
                if (scalar @{$parsed_arg{'ids'}} == 0) {
 | 
						|
                    @{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config);
 | 
						|
                }
 | 
						|
                foreach my $id (@{$parsed_arg{'ids'}}) {
 | 
						|
                    $page_config->{'id'} = $id;
 | 
						|
                    verbose ($verbose, "Generating page: $page_config->{'page'} with id: $id");
 | 
						|
                    generate_page($tt, \$page_config, $preview);
 | 
						|
                }
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                verbose ($verbose, "Generating page: $page_config->{'page'}");
 | 
						|
                generate_page($tt, \$page_config, $preview);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            verbose (1, "\nWarning: Page $parsed_arg{'page'} is not defined in the configuration file.");
 | 
						|
        }
 | 
						|
    }
 | 
						|
    verbose (1, "\nFinished processing the files.");
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
sub get_template_html {
 | 
						|
    # For an HTML based Template file, define the
 | 
						|
    # template start and end tags to also function as
 | 
						|
    # HTML comments to make the template file valid HTML.
 | 
						|
    #
 | 
						|
    return Template->new(
 | 
						|
        {   INCLUDE_PATH => $_[1]{templates_path},
 | 
						|
            OUTPUT_PATH  => $_[1]{output_path},
 | 
						|
            EVAL_PERL    => 1,
 | 
						|
            START_TAG    => '<!--%',
 | 
						|
            END_TAG      => '%-->',
 | 
						|
            PRE_CHOMP    => 1,
 | 
						|
            POST_CHOMP   => 1,
 | 
						|
            CONSTANTS    => {
 | 
						|
                database => $_[0]{database},
 | 
						|
                driver   => $_[0]{driver},
 | 
						|
                user     => $_[0]{user},
 | 
						|
                password => $_[0]{password},
 | 
						|
            }
 | 
						|
        }
 | 
						|
    ) || die $Template::ERROR, "\n";
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
sub generate_page {
 | 
						|
    my ( $tt, $config, $preview ) = @_;
 | 
						|
    my $html;
 | 
						|
    if ( !$preview ) {
 | 
						|
        $html = get_filename($$config);
 | 
						|
    }
 | 
						|
    $tt->process( $$config->{root_template},
 | 
						|
        $$config, $html
 | 
						|
    )
 | 
						|
        || die $tt->error(), "\n";
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
sub verbose {
 | 
						|
        my ($verbose, $message) = @_;
 | 
						|
        if ($verbose) {
 | 
						|
                if ($verbose ne 'quiet') {
 | 
						|
                        print STDOUT "$message\n";
 | 
						|
                }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
                STDOUT->autoflush(1);
 | 
						|
                print STDOUT ".";
 | 
						|
        };
 | 
						|
}
 | 
						|
 | 
						|
sub parse_page_arg {
 | 
						|
    my ($page_arg) =  @_;
 | 
						|
    # Split page name from page ids if available.
 | 
						|
    my ($page, $ids) = split(/=/, $page_arg);
 | 
						|
    my @ids;
 | 
						|
 | 
						|
    if(!$ids) {
 | 
						|
        $ids = "";
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        # Parse the page ids and push them onto @ids array
 | 
						|
        my @ids_by_comma = split(/\,/, $ids);
 | 
						|
        foreach my $id_by_comma (@ids_by_comma) {
 | 
						|
            my @ids_for_range = split(/\.\./, $id_by_comma);
 | 
						|
            if ((scalar @ids_for_range) == 2) {
 | 
						|
                push @ids, $ids_for_range[0]..$ids_for_range[1];
 | 
						|
            }
 | 
						|
            elsif ((scalar @ids_for_range) == 1) {
 | 
						|
                push @ids, $ids_for_range[0];
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                verbose (1, "\nWarning: Page $page id range $id_by_comma could not be parsed.");
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return ('page' => $page, 'ids' => [@ids]);
 | 
						|
}
 | 
						|
 | 
						|
sub get_ids_from_db {
 | 
						|
        # Use a template to generate a string of page identifiers.
 | 
						|
        # The template should return the string in the form of
 | 
						|
        # <comma><identifier><comma><identifier>...
 | 
						|
        #
 | 
						|
        my ($tt, $config)  = @_;
 | 
						|
        my $selected_ids = "";
 | 
						|
        my $id_template = "ids-$$config->{'page'}.tpl.html";
 | 
						|
 | 
						|
        $tt->process($id_template, $$config, \$selected_ids)
 | 
						|
        || die $tt->error(), "\n";
 | 
						|
 | 
						|
        # Starts with a newline and comma
 | 
						|
        return split(/,/, substr($selected_ids, 2));
 | 
						|
}
 | 
						|
 | 
						|
sub get_filename {
 | 
						|
        my ($config) = @_;
 | 
						|
        my $filename = "output.html";
 | 
						|
        my $base_path = "";
 | 
						|
 | 
						|
        if ($$config{'filename'}) {
 | 
						|
                if (substr($$config{'filename'}, -1) eq '/') {
 | 
						|
                        $base_path = $$config{'filename'};
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                        $filename = $$config{'filename'};
 | 
						|
                        my $padded_index = "";
 | 
						|
                        if (exists $$config{'id'} && $$config{'id'} ne "") {
 | 
						|
                                $padded_index = sprintf("%04d", $$config{'id'});
 | 
						|
                        }
 | 
						|
                        $filename =~ s/\[id\]/$padded_index/;
 | 
						|
                    return $filename;
 | 
						|
                }
 | 
						|
        }
 | 
						|
        # Default naming if full filename configuration is not supplied.
 | 
						|
        if ($$config{'multipage'} && $$config{'multipage'} eq 'true') {
 | 
						|
                my $padded_index = sprintf("%04d", $$config{'id'});
 | 
						|
                $filename = "$base_path$$config{'page'}${padded_index}.html";
 | 
						|
        }
 | 
						|
        else {
 | 
						|
                $filename = "$base_path$$config{'page'}.html";
 | 
						|
        }
 | 
						|
        return $filename;
 | 
						|
}
 | 
						|
 | 
						|
sub print_available_pages {
 | 
						|
    # Load config file
 | 
						|
    read_config "site.cfg" => my %config;
 | 
						|
 | 
						|
    my @page_args = sort ( keys %config );
 | 
						|
 | 
						|
    # Remove non page sections of the configuration file
 | 
						|
    # from the generated list of pages.
 | 
						|
    @page_args = grep { $_ ne 'DBI' } @page_args;
 | 
						|
    @page_args = grep { $_ ne 'root_template' } @page_args;
 | 
						|
 | 
						|
    foreach my $page_arg (@page_args) {
 | 
						|
        print "$page_arg\n";
 | 
						|
    }
 | 
						|
    exit;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: parse_csv
 | 
						|
#      PURPOSE: Parses a simple string containing CSV data
 | 
						|
#   PARAMETERS: $csv_in         CSV string
 | 
						|
#      RETURNS: An arrayref containing the parsed CSV elements
 | 
						|
#  DESCRIPTION: The Text::CSV_XS module instance is created with the option
 | 
						|
#               'allow_whitespace' to be forgiving of any spaces around the
 | 
						|
#               CSV elements and to strip them. Also, 'allow_loose_quotes' is
 | 
						|
#               forgiving of really messed up CSV. The 'binary' option
 | 
						|
#               permits any characters in the tags (expecting Unicode).
 | 
						|
#               The fields parsed from the tag string is checked for the
 | 
						|
#               existence of utf8 characters and encoded to ensure any found
 | 
						|
#               are properly stored.
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub parse_csv {
 | 
						|
    my ($csv_in) = @_;
 | 
						|
 | 
						|
    my $csv = Text::CSV_XS->new(
 | 
						|
        {   binary             => 1,
 | 
						|
            auto_diag          => 1,
 | 
						|
            allow_whitespace   => 1,
 | 
						|
            allow_loose_quotes => 1
 | 
						|
        }
 | 
						|
    );
 | 
						|
    my $status = $csv->parse($csv_in);
 | 
						|
    unless ( $status ) {
 | 
						|
        warn "Failed to parse CSV '$csv_in'\n" ;
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    my @fields = $csv->fields();
 | 
						|
 | 
						|
    @fields = map {utf8::encode($_) if utf8::is_utf8($_); $_} @fields;
 | 
						|
 | 
						|
    return \@fields;
 | 
						|
}
 | 
						|
 | 
						|
#===  FUNCTION  ================================================================
 | 
						|
#         NAME: xml_entity
 | 
						|
#      PURPOSE: Static filter to encode Unicode for XML
 | 
						|
#   PARAMETERS: $text           String to be processed
 | 
						|
#      RETURNS: Processed text
 | 
						|
#  DESCRIPTION:
 | 
						|
#       THROWS: No exceptions
 | 
						|
#     COMMENTS: None
 | 
						|
#     SEE ALSO: N/A
 | 
						|
#===============================================================================
 | 
						|
sub xml_entity {
 | 
						|
    my ($text) = @_;
 | 
						|
 | 
						|
#    encode_entities_numeric( $text );
 | 
						|
    encode_entities( $text );
 | 
						|
 | 
						|
    return $text;
 | 
						|
}
 | 
						|
 | 
						|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 |