forked from HPR/hpr_generator
		
	
		
			
				
	
	
		
			435 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			435 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/perl
 | |
| 
 | |
| # {{{ POD documentation
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
|         site-generator - HPR Site Generator
 | |
| 
 | |
| =head1 SYNOPSIS
 | |
| 
 | |
|         site-generator [OPTION]... PAGE|PAGE=<comma separated list of ids>...
 | |
| 
 | |
|         -a, --all           generate all pages defined in configuration file
 | |
|         -c, --configuration path to configuration file
 | |
|         -l, --list          print list of configured pages
 | |
|         -p, --preview       print generated pages to standard out
 | |
|         -q, --quiet         suppress progress information while generating pages
 | |
|         -v, --verbose       print extended progress information while generating pages
 | |
|         --help              print this help message
 | |
| 
 | |
|         Where I<PAGE> is a file name of a web page
 | |
|         or the special I<ALL> (to generate all pages).
 | |
| 
 | |
|         Examples:
 | |
| 
 | |
|                 Generate two specific pages:
 | |
|                 site-generator index about
 | |
| 
 | |
|                 Generate the whole site:
 | |
|                 site-generator --all
 | |
| 
 | |
|                 Generate pages based on the same template:
 | |
|                 site-generator correspondent=1,3,5..10
 | |
| 
 | |
|                 Generate two specific pages with a different configuration:
 | |
|                 site-generator --configuration=site_sqlite.cfg index about
 | |
| 
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| This is a site generator for the Hacker Public Radio website based upon the
 | |
| Perl Template Toolkit.
 | |
| 
 | |
| =head1 INSTALLATION
 | |
| 
 | |
|         With SQLite
 | |
|         * Create the sqlite3 database from the hpr.sql MySQL dump file available on
 | |
|           hackerpublicradio.org. The default name for the database file is "hpr.db"
 | |
|           and should be located in the root of the project directory. The name and
 | |
|           location can be set in the site.cfg file.
 | |
|         * An "update-hpr.sh" helper script is available in the utils directory. This
 | |
|           script will download the hpr.sql file, convert it to the SQLite hpr.db file,
 | |
|           and regenerate the website using the site-generator.
 | |
|                 1. `cd` into the root of the project directory
 | |
|                 2.  Run `./utils/update-hpr.sh`
 | |
|         * SQLite v3.8.3 or greater is recommended. CTE WITH clauses are used in some template queries.
 | |
|           Must convert WITH clauses to sub-queries when using earlier versions of SQLite.
 | |
| 
 | |
|         With MySQL
 | |
|         * Create database hpr_hpr in the MySQL server from HPR dump file.
 | |
|                 - sudo mysql --host=localhost < hpr.sql
 | |
|         * Create a user that will be used by the site-generator.
 | |
|                 - Suggested username: hpr-generator
 | |
|                 - CREATE USER 'hpr-generator'@'localhost' IDENTIFIED BY '<password>';
 | |
|         * Limit the user's privileges to EXECUTE and SELECT
 | |
|                 - GRANT SELECT ON hpr_hpr.* TO 'hpr-generator'@'localhost';
 | |
|                 - GRANT EXECUTE ON `hpr_hpr`.* TO 'hpr-generator'@'localhost';
 | |
| 
 | |
|         Install the needed Perl modules using preferred method (distribution packages, CPAN, etc.)
 | |
|                 * Config::Std
 | |
|                 * DBD::SQLite or DBD:mysql
 | |
|                 * DBI
 | |
|                 * Data::Dumper
 | |
|                 * Date::Calc
 | |
|                 * GetOpt::Long
 | |
|                 * HTML::Entities
 | |
|                 * Pod::Usage
 | |
|                 * Template
 | |
|                 * Template::Plugin::DBI
 | |
|                 * Template::Plugin::Date
 | |
|                 * Template::Plugin::File
 | |
|                 * Template::Plugin::HTML::Strip
 | |
|                 * Text::CSV_XS
 | |
|                 * Tie::DBI
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
|         Roan Horning <roan.horning@no-spam.gmail.com>
 | |
| 
 | |
| =head1 LICENSE
 | |
| 
 | |
|         site-generator -- a static website generator for HPR
 | |
|         Copyright (C) 2022 Roan Horning
 | |
| 
 | |
|     This program is free software: you can redistribute it and/or modify
 | |
|     it under the terms of the GNU Affero General Public License as published by
 | |
|     the Free Software Foundation, either version 3 of the License, or
 | |
|     (at your option) any later version.
 | |
| 
 | |
|     This program is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|     GNU Affero General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Affero General Public License
 | |
|     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | |
| 
 | |
| =cut
 | |
| 
 | |
| # }}}
 | |
| 
 | |
| use 5.012;
 | |
| use strict;
 | |
| use warnings;
 | |
| use open ':encoding(UTF-8)';
 | |
| 
 | |
| use Getopt::Long qw(:config auto_help);
 | |
| use Pod::Usage;
 | |
| use Config::Std;
 | |
| use Text::CSV_XS;
 | |
| #use HTML::Entities qw(encode_entities_numeric);
 | |
| use HTML::Entities qw(encode_entities);
 | |
| use Date::Calc;
 | |
| use DBI;
 | |
| use Tie::DBI;
 | |
| use Template;
 | |
| use Template::Plugin::Date;
 | |
| use Template::Plugin::DBI;
 | |
| use Template::Plugin::HTML::Strip;
 | |
| 
 | |
| use Data::Dumper;
 | |
| 
 | |
| exit main();
 | |
| 
 | |
| sub main {
 | |
| 
 | |
|     # Argument parsing
 | |
|     my $all;
 | |
|     my $configuration_path;
 | |
|     my $preview;
 | |
|     my $verbose;
 | |
|     my $quiet;
 | |
|     GetOptions(
 | |
|         'all'             => \$all,
 | |
|         'configuration=s' => \$configuration_path,
 | |
|         'list'            => \&print_available_pages,
 | |
|         'preview'         => \$preview,
 | |
|         'verbose'         => \$verbose,
 | |
|         'quiet'           => \$quiet,
 | |
|     ) or pod2usage(1);
 | |
|     pod2usage(1) unless @ARGV || $all;
 | |
|     my (@page_args) = @ARGV;
 | |
| 
 | |
|     if ($quiet) {
 | |
|         $verbose = 'quiet';
 | |
|     };
 | |
| 
 | |
|     if (!$configuration_path) {
 | |
|         $configuration_path = "site.cfg";
 | |
|     }
 | |
| 
 | |
|     my %config;
 | |
|     if ( -f $configuration_path ) {
 | |
|         # Load config file
 | |
|         read_config $configuration_path => %config;
 | |
|     }
 | |
|     else {
 | |
|         print STDOUT "Could not read configuration file: $configuration_path\n";
 | |
|         exit 1;
 | |
|     }
 | |
| 
 | |
|     my $tt = get_template_html($config{DBI}, $config{app_paths});
 | |
| 
 | |
|     #
 | |
|     # Define a TT² vmethod called 'csv_parse', it takes a scalar value and
 | |
|     # returns an arrayref. Also define a filter called 'xml_entity' which
 | |
|     # numerically encodes non-ASCII characters.
 | |
|     #
 | |
|     $tt->context->define_vmethod( 'scalar', 'csv_parse', \&parse_csv );
 | |
|     $tt->context->define_filter( 'xml_entity', \&xml_entity );
 | |
| 
 | |
|     # If command line option all is set, parse configuration file
 | |
|     # for all pages
 | |
|     if ($all) {
 | |
|         @page_args = keys %config;
 | |
| 
 | |
|         # Remove non page sections of the configuration file
 | |
|         # from the generated list of pages.
 | |
|         @page_args= grep { $_ ne 'DBI' } @page_args;
 | |
|         @page_args= grep { $_ ne 'root_template' } @page_args;
 | |
|         @page_args= grep { $_ ne 'app_paths' } @page_args;
 | |
| 
 | |
|     };
 | |
|     foreach my $page_arg (@page_args) {
 | |
|         my %parsed_arg = parse_page_arg($page_arg);
 | |
|         if (exists($config{$parsed_arg{'page'}})) {
 | |
|             my $page_config = $config{$parsed_arg{'page'}};
 | |
|             $page_config->{'page'} = $parsed_arg{'page'};
 | |
| 
 | |
|             # Set page's root_template to the default root_template if the
 | |
|             # page root_template property is not set in the configuration file.
 | |
|             if (exists $page_config->{'root_template'} == 0) {
 | |
|                 $page_config->{'root_template'} = $config{root_template}{content};
 | |
|             }
 | |
| 
 | |
|             # Set all config root_template properties as default page config properties
 | |
|             # except the previously set root_template content property
 | |
|             my @root_args = grep { $_ ne 'content' } keys %{$config{root_template}};
 | |
|             foreach my $root_arg (@root_args) {
 | |
|                 if (exists $page_config->{$root_arg} == 0) {
 | |
|                     $page_config->{$root_arg} = $config{root_template}{$root_arg};
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') {
 | |
|                 if (scalar @{$parsed_arg{'ids'}} == 0) {
 | |
|                     @{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config);
 | |
|                 }
 | |
|                 foreach my $id (@{$parsed_arg{'ids'}}) {
 | |
|                     $page_config->{'id'} = $id;
 | |
|                     verbose ($verbose, "Generating page: $page_config->{'page'} with id: $id");
 | |
|                     generate_page($tt, \$page_config, $preview);
 | |
|                 }
 | |
|             }
 | |
|             else {
 | |
|                 verbose ($verbose, "Generating page: $page_config->{'page'}");
 | |
|                 generate_page($tt, \$page_config, $preview);
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             verbose (1, "\nWarning: Page $parsed_arg{'page'} is not defined in the configuration file.");
 | |
|         }
 | |
|     }
 | |
|     verbose (1, "\nFinished processing the files.");
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| sub get_template_html {
 | |
|     # For an HTML based Template file, define the
 | |
|     # template start and end tags to also function as
 | |
|     # HTML comments to make the template file valid HTML.
 | |
|     #
 | |
|     return Template->new(
 | |
|         {   INCLUDE_PATH => $_[1]{templates_path},
 | |
|             OUTPUT_PATH  => $_[1]{output_path},
 | |
|             EVAL_PERL    => 1,
 | |
|             START_TAG    => '<!--%',
 | |
|             END_TAG      => '%-->',
 | |
|             PRE_CHOMP    => 1,
 | |
|             POST_CHOMP   => 1,
 | |
|             CONSTANTS    => {
 | |
|                 database => $_[0]{database},
 | |
|                 driver   => $_[0]{driver},
 | |
|                 user     => $_[0]{user},
 | |
|                 password => $_[0]{password},
 | |
|             }
 | |
|         }
 | |
|     ) || die $Template::ERROR, "\n";
 | |
| 
 | |
| }
 | |
| 
 | |
| sub generate_page {
 | |
|     my ( $tt, $config, $preview ) = @_;
 | |
|     my $html;
 | |
|     if ( !$preview ) {
 | |
|         $html = get_filename($$config);
 | |
|     }
 | |
|     $tt->process( $$config->{root_template},
 | |
|         $$config, $html
 | |
|     )
 | |
|         || die $tt->error(), "\n";
 | |
| 
 | |
| }
 | |
| 
 | |
| sub verbose {
 | |
|         my ($verbose, $message) = @_;
 | |
|         if ($verbose) {
 | |
|                 if ($verbose ne 'quiet') {
 | |
|                         print STDOUT "$message\n";
 | |
|                 }
 | |
|         }
 | |
|         else {
 | |
|                 STDOUT->autoflush(1);
 | |
|                 print STDOUT ".";
 | |
|         };
 | |
| }
 | |
| 
 | |
| sub parse_page_arg {
 | |
|     my ($page_arg) =  @_;
 | |
|     # Split page name from page ids if available.
 | |
|     my ($page, $ids) = split(/=/, $page_arg);
 | |
|     my @ids;
 | |
|     $ids = "" unless defined $ids;
 | |
| 
 | |
|     # Parse the page ids and push them onto @ids array
 | |
|     my @ids_by_comma = split(/\,/, $ids);
 | |
|     foreach my $id_by_comma (@ids_by_comma) {
 | |
|         my @ids_for_range = split(/\.\./, $id_by_comma);
 | |
|         if ((scalar @ids_for_range) == 2) {
 | |
|             push @ids, $ids_for_range[0]..$ids_for_range[1];
 | |
|         }
 | |
|         elsif ((scalar @ids_for_range) == 1) {
 | |
|             push @ids, $ids_for_range[0];
 | |
|         }
 | |
|         else {
 | |
|             verbose (1, "\nWarning: Page $page id range $id_by_comma could not be parsed.");
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     return ('page' => $page, 'ids' => [@ids]);
 | |
| }
 | |
| 
 | |
| sub get_ids_from_db {
 | |
|         # Use a template to generate a string of page identifiers.
 | |
|         # The template should return the string in the form of
 | |
|         # <comma><identifier><comma><identifier>...
 | |
|         #
 | |
|         my ($tt, $config)  = @_;
 | |
|         my $selected_ids = "";
 | |
|         my $id_template = "ids-$$config->{'page'}.tpl.html";
 | |
| 
 | |
|         $tt->process($id_template, $$config, \$selected_ids)
 | |
|         || die $tt->error(), "\n";
 | |
| 
 | |
|         # Starts with a newline and comma
 | |
|         return split(/,/, substr($selected_ids, 1));
 | |
| }
 | |
| 
 | |
| sub get_filename {
 | |
|         my ($config) = @_;
 | |
|         my $filename = "output.html";
 | |
|         my $base_path = "";
 | |
| 
 | |
|         if ($$config{'filename'}) {
 | |
|                 if (substr($$config{'filename'}, -1) eq '/') {
 | |
|                         $base_path = $$config{'filename'};
 | |
|                 }
 | |
|                 else {
 | |
|                         $filename = $$config{'filename'};
 | |
|                         my $padded_index = "";
 | |
|                         if (exists $$config{'id'} && $$config{'id'} ne "") {
 | |
|                                 $padded_index = sprintf("%04d", $$config{'id'});
 | |
|                         }
 | |
|                         $filename =~ s/\[id\]/$padded_index/;
 | |
|                     return $filename;
 | |
|                 }
 | |
|         }
 | |
|         # Default naming if full filename configuration is not supplied.
 | |
|         if ($$config{'multipage'} && $$config{'multipage'} eq 'true') {
 | |
|                 my $padded_index = sprintf("%04d", $$config{'id'});
 | |
|                 $filename = "$base_path$$config{'page'}${padded_index}.html";
 | |
|         }
 | |
|         else {
 | |
|                 $filename = "$base_path$$config{'page'}.html";
 | |
|         }
 | |
|         return $filename;
 | |
| }
 | |
| 
 | |
| sub print_available_pages {
 | |
|     # Load config file
 | |
|     read_config "site.cfg" => my %config;
 | |
| 
 | |
|     my @page_args = sort ( keys %config );
 | |
| 
 | |
|     # Remove non page sections of the configuration file
 | |
|     # from the generated list of pages.
 | |
|     @page_args = grep { $_ ne 'DBI' } @page_args;
 | |
|     @page_args = grep { $_ ne 'root_template' } @page_args;
 | |
| 
 | |
|     foreach my $page_arg (@page_args) {
 | |
|         print "$page_arg\n";
 | |
|     }
 | |
|     exit;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: parse_csv
 | |
| #      PURPOSE: Parses a simple string containing CSV data
 | |
| #   PARAMETERS: $csv_in         CSV string
 | |
| #      RETURNS: An arrayref containing the parsed CSV elements
 | |
| #  DESCRIPTION: The Text::CSV_XS module instance is created with the option
 | |
| #               'allow_whitespace' to be forgiving of any spaces around the
 | |
| #               CSV elements and to strip them. Also, 'allow_loose_quotes' is
 | |
| #               forgiving of really messed up CSV. The 'binary' option
 | |
| #               permits any characters in the tags (expecting Unicode).
 | |
| #               The fields parsed from the tag string is checked for the
 | |
| #               existence of utf8 characters and encoded to ensure any found
 | |
| #               are properly stored.
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub parse_csv {
 | |
|     my ($csv_in) = @_;
 | |
| 
 | |
|     my $csv = Text::CSV_XS->new(
 | |
|         {   binary             => 1,
 | |
|             auto_diag          => 1,
 | |
|             allow_whitespace   => 1,
 | |
|             allow_loose_quotes => 1
 | |
|         }
 | |
|     );
 | |
|     my $status = $csv->parse($csv_in);
 | |
|     unless ( $status ) {
 | |
|         warn "Failed to parse CSV '$csv_in'\n" ;
 | |
|         return;
 | |
|     }
 | |
|     my @fields = $csv->fields();
 | |
| 
 | |
|     @fields = map {utf8::encode($_) if utf8::is_utf8($_); $_} @fields;
 | |
| 
 | |
|     return \@fields;
 | |
| }
 | |
| 
 | |
| #===  FUNCTION  ================================================================
 | |
| #         NAME: xml_entity
 | |
| #      PURPOSE: Static filter to encode Unicode for XML
 | |
| #   PARAMETERS: $text           String to be processed
 | |
| #      RETURNS: Processed text
 | |
| #  DESCRIPTION:
 | |
| #       THROWS: No exceptions
 | |
| #     COMMENTS: None
 | |
| #     SEE ALSO: N/A
 | |
| #===============================================================================
 | |
| sub xml_entity {
 | |
|     my ($text) = @_;
 | |
| 
 | |
| #    encode_entities_numeric( $text );
 | |
|     encode_entities( $text );
 | |
| 
 | |
|     return $text;
 | |
| }
 | |
| 
 | |
| # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
 |