519 lines
13 KiB
Perl
Executable File
519 lines
13 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
#===============================================================================
|
|
#
|
|
# FILE: make_tag_index
|
|
#
|
|
# USAGE: ./make_tag_index [-help] [-debug=N] [-out=FILE] [-config=FILE]
|
|
#
|
|
# DESCRIPTION: Make tag lookup pages for the HPR website
|
|
#
|
|
# OPTIONS: ---
|
|
# REQUIREMENTS: ---
|
|
# BUGS: ---
|
|
# NOTES: ---
|
|
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
|
# VERSION: 0.0.2
|
|
# CREATED: 2022-09-08 11:52:53
|
|
# REVISION: 2022-09-10 14:59:38
|
|
#
|
|
#===============================================================================
|
|
|
|
use v5.16;
|
|
use strict;
|
|
use warnings;
|
|
use utf8;
|
|
use feature qw{ postderef say signatures state };
|
|
no warnings qw{ experimental::postderef experimental::signatures };
|
|
|
|
use Getopt::Long;
|
|
use Pod::Usage;
|
|
|
|
use Config::General;
|
|
|
|
use Template;
|
|
use Template::Filters;
|
|
Template::Filters->use_html_entities; # Use HTML::Entities in the template
|
|
|
|
use Text::CSV_XS;
|
|
|
|
use DBI;
|
|
|
|
use Data::Dumper;
|
|
|
|
#
|
|
# Version number (manually incremented)
|
|
#
|
|
our $VERSION = '0.0.2';
|
|
|
|
#
|
|
# Script and directory names
|
|
#
|
|
( my $PROG = $0 ) =~ s|.*/||mx;
|
|
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
|
$DIR = '.' unless $DIR;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Declarations
|
|
#-------------------------------------------------------------------------------
|
|
#
|
|
# Constants and other declarations
|
|
#
|
|
my $basedir = "$ENV{HOME}/HPR/Database";
|
|
my $configfile = "$basedir/.hpr_db.cfg";
|
|
my $template = "$basedir/$PROG.tpl";
|
|
|
|
my ( $dbh, $sth1, $h1 );
|
|
|
|
#
|
|
# Enable Unicode mode
|
|
#
|
|
binmode STDOUT, ":encoding(UTF-8)";
|
|
binmode STDERR, ":encoding(UTF-8)";
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Options and arguments
|
|
#-------------------------------------------------------------------------------
|
|
my $DEF_DEBUG = 0;
|
|
|
|
#
|
|
# Process options
|
|
#
|
|
my %options;
|
|
Options( \%options );
|
|
|
|
#
|
|
# Default help
|
|
#
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 )
|
|
if ( $options{'help'} );
|
|
|
|
#
|
|
# Full documentation if requested with -doc
|
|
#
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -verbose => 2, -exitval => 1 )
|
|
if ( $options{'doc'} );
|
|
|
|
#
|
|
# Collect options
|
|
#
|
|
my $DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEF_DEBUG );
|
|
my $cfgfile
|
|
= ( defined( $options{config} ) ? $options{config} : $configfile );
|
|
|
|
my $templatefile = $options{template};
|
|
my $outfile = $options{out};
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Template is the default pre-defined string or a filename
|
|
#-------------------------------------------------------------------------------
|
|
if ($templatefile) {
|
|
die "Unable to find template $templatefile\n" unless ( -e $templatefile );
|
|
}
|
|
else {
|
|
$templatefile = $template;
|
|
}
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Open the output file (or STDOUT)
|
|
#-------------------------------------------------------------------------------
|
|
my $outfh;
|
|
if ($outfile) {
|
|
open( $outfh, ">:encoding(UTF-8)", $outfile )
|
|
or die "Unable to open $outfile for writing: $!";
|
|
}
|
|
else {
|
|
open( $outfh, ">&", \*STDOUT )
|
|
or die "Unable to initialise for writing: $!";
|
|
}
|
|
|
|
#
|
|
# Sanity check
|
|
#
|
|
die "Unable to find $cfgfile\n" unless ( -e $cfgfile );
|
|
|
|
#
|
|
# Load configuration data
|
|
#
|
|
my $conf = new Config::General(
|
|
-ConfigFile => $cfgfile,
|
|
-InterPolateVars => 1,
|
|
-ExtendedAccess => 1
|
|
);
|
|
my %config = $conf->getall();
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Connect to the database
|
|
#-------------------------------------------------------------------------------
|
|
my $dbhost = $config{database}->{host} // '127.0.0.1';
|
|
my $dbport = $config{database}->{port} // 3306;
|
|
my $dbname = $config{database}->{name};
|
|
my $dbuser = $config{database}->{user};
|
|
my $dbpwd = $config{database}->{password};
|
|
$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
|
|
$dbuser, $dbpwd, { AutoCommit => 1 } )
|
|
or die $DBI::errstr;
|
|
|
|
#
|
|
# Enable client-side UTF8
|
|
#
|
|
$dbh->{mysql_enable_utf8} = 1;
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Perform a scan of episodes for tags and accumulate them in a hash
|
|
#-------------------------------------------------------------------------------
|
|
$sth1 = $dbh->prepare(q{SELECT id,title,tags FROM eps WHERE length(tags) > 0})
|
|
or die $DBI::errstr;
|
|
|
|
$sth1->execute;
|
|
if ( $dbh->err ) {
|
|
warn $dbh->errstr;
|
|
}
|
|
|
|
my ( $status, @fields, %tag_ids, $lastkey, @tagindex, %showtitles );
|
|
|
|
my $csv = Text::CSV_XS->new(
|
|
{ binary => 1,
|
|
auto_diag => 1,
|
|
escape_char => "\\",
|
|
allow_loose_quotes => 1
|
|
}
|
|
);
|
|
|
|
#
|
|
# Loop through the episodes returned by the query
|
|
#
|
|
while ( $h1 = $sth1->fetchrow_hashref ) {
|
|
#
|
|
# Stash the show title with the show number
|
|
#
|
|
$showtitles{ $h1->{id} } = $h1->{title};
|
|
|
|
#
|
|
# Parse the tag list for the current episode
|
|
#
|
|
$status = $csv->parse( $h1->{tags} );
|
|
unless ($status) {
|
|
#
|
|
# Report any errors
|
|
#
|
|
print "Parse error on episode ", $h1->{id}, "\n";
|
|
print $csv->error_input(), "\n";
|
|
next;
|
|
}
|
|
@fields = $csv->fields();
|
|
|
|
#
|
|
# Not sure why there are no tags but if not ignore this episode
|
|
#
|
|
next unless (@fields);
|
|
|
|
#
|
|
# Trim and lowercase all tags
|
|
#
|
|
@fields = map {
|
|
my $t = $_;
|
|
$t =~ s/(^\s+|\s+$)//g;
|
|
lc($t)
|
|
} @fields;
|
|
|
|
#
|
|
# Loop through the tags. For each tag add the associated episode id to the
|
|
# %tag_ids hash. The key to this hash is the lower case tag and the value
|
|
# is an array of episode numbers.
|
|
#
|
|
foreach my $tag (@fields) {
|
|
if ( defined( $tag_ids{$tag} ) ) {
|
|
#
|
|
# Add to the existing array
|
|
#
|
|
push( @{ $tag_ids{$tag} }, $h1->{id} );
|
|
}
|
|
else {
|
|
#
|
|
# Create the episode array
|
|
#
|
|
$tag_ids{$tag} = [ $h1->{id} ];
|
|
}
|
|
}
|
|
}
|
|
|
|
#
|
|
# Dumps the whole tags table. Warning!
|
|
#
|
|
_debug( $DEBUG > 2, '%tag_ids: ' . Dumper( \%tag_ids ) );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Make an alphabetic index of the tags
|
|
#-------------------------------------------------------------------------------
|
|
$lastkey = '';
|
|
foreach my $tag ( sort( keys(%tag_ids) ) ) {
|
|
if ( substr( $tag, 0, 1 ) ne $lastkey ) {
|
|
$lastkey = substr( $tag, 0, 1 );
|
|
push( @tagindex, $tag );
|
|
}
|
|
}
|
|
|
|
_debug( $DEBUG > 1, '@tagindex: ' . Dumper( \@tagindex ) );
|
|
|
|
#-------------------------------------------------------------------------------
|
|
# Fill and print the template
|
|
#-------------------------------------------------------------------------------
|
|
my $tt = Template->new(
|
|
{ ABSOLUTE => 1,
|
|
ENCODING => 'utf8',
|
|
INCLUDE_PATH => $basedir,
|
|
OUTPUT_PATH => '.',
|
|
}
|
|
);
|
|
my $vars = {
|
|
tag_ids => \%tag_ids,
|
|
tagindex => \@tagindex,
|
|
titles => \%showtitles,
|
|
};
|
|
my $document;
|
|
$tt->process( $templatefile, $vars, \$document, { binmode => ':utf8' } )
|
|
|| die $tt->error(), "\n";
|
|
|
|
print $outfh $document;
|
|
close($outfh);
|
|
|
|
$dbh->disconnect;
|
|
|
|
exit;
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: _debug
|
|
# PURPOSE: Prints debug reports
|
|
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
|
|
# $message Message to print
|
|
# RETURNS: Nothing
|
|
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
|
|
# newline and then adds one in the 'print' to the caller doesn't
|
|
# have to bother. Prepends the message with 'D> ' to show it's
|
|
# a debug message.
|
|
# THROWS: No exceptions
|
|
# COMMENTS: None
|
|
# SEE ALSO: N/A
|
|
#===============================================================================
|
|
sub _debug {
|
|
my ( $active, $message ) = @_;
|
|
|
|
chomp($message);
|
|
print "D> $message\n" if $active;
|
|
}
|
|
|
|
#=== FUNCTION ================================================================
|
|
# NAME: Options
|
|
# PURPOSE: Processes command-line options
|
|
# PARAMETERS: $optref Hash reference to hold the options
|
|
# RETURNS: Undef
|
|
# DESCRIPTION:
|
|
# THROWS: no exceptions
|
|
# COMMENTS: none
|
|
# SEE ALSO: n/a
|
|
#===============================================================================
|
|
sub Options {
|
|
my ($optref) = @_;
|
|
|
|
my @options
|
|
= ( "help", "doc", "debug=i", "template=s", "out=s", "config=s", );
|
|
|
|
if ( !GetOptions( $optref, @options ) ) {
|
|
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1 );
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
__END__
|
|
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
# Application Documentation
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
#{{{
|
|
|
|
=head1 NAME
|
|
|
|
make_tag_index - Generate a tag index from the tags in the database
|
|
|
|
=head1 VERSION
|
|
|
|
This documentation refers to make_tag_index version 0.0.2
|
|
|
|
=head1 USAGE
|
|
|
|
./make_tag_index [-help] [-doc] [-debug=N] [-template=FILE] [-out=FILE]
|
|
[-config=FILE]
|
|
|
|
./make_tag_index -help
|
|
./make_tag_index -doc
|
|
./make_tag_index -out=tags.php
|
|
./make_tag_index -template=MTI_1.tpl -out=tags.php
|
|
./make_tag_index -config=$HOME/HPR/.hpr_livedb.cfg -out=tags.php
|
|
|
|
=head1 OPTIONS
|
|
|
|
=over 8
|
|
|
|
=item B<-help>
|
|
|
|
Prints a brief help message describing the usage of the program, and then exits.
|
|
|
|
=item B<-doc>
|
|
|
|
Displays the entirety of the documentation (using a pager), and then exits. To
|
|
generate a PDF version use:
|
|
|
|
pod2pdf make_tag_index --out=make_tag_index.pdf
|
|
|
|
|
|
=item B<-debug=N>
|
|
|
|
Causes certain debugging information to be displayed.
|
|
|
|
0 (the default) no debug output
|
|
1 N/A
|
|
2 dumps @tagindex an array containing tags and show numbers for the index
|
|
3 dumps %tag_ids the data used to build the entire tag list (warning!)
|
|
|
|
=item B<-out=FILE>
|
|
|
|
This option defines an output file to receive the report. If the option is
|
|
omitted the report is written to STDOUT, allowing it to be redirected if
|
|
required.
|
|
|
|
=item B<-config=FILE>
|
|
|
|
This option allows an alternative configuration file to be used. This file
|
|
defines the location of the database, its port, its name and the username and
|
|
password to be used to access it. This feature was added to allow the script
|
|
to access alternative databases or the live database over an SSH tunnel.
|
|
|
|
See the CONFIGURATION AND ENVIRONMENT section below for the file format.
|
|
|
|
If the option is omitted the default file is used: B<.hpr_db.cfg>
|
|
|
|
=item B<-template=FILE>
|
|
|
|
This option defines the template used to generate the tag index. The template
|
|
is written using the B<Template> toolkit language.
|
|
|
|
If the option is omitted then the script uses the file
|
|
B<make_tag_index.tpl> in the same directory as the script. If this file
|
|
does not exist then the script will exit with an error message.
|
|
|
|
=back
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
The script reads all episodes in the HPR database. Each row contains a 'tags'
|
|
field which contains tags as a comma-separated list. This list is parsed and
|
|
stored in a Perl hash. The hash is keyed by the lower-case tag and the value
|
|
part of each hash element contains a Perl arrayref containing a list of show
|
|
numbers. The tag/show hash is called B<%tag_ids>. There are over 5800 tags in
|
|
the system in September 2022.
|
|
|
|
An array called B<@tagindex> is also created which holds the first tag of each
|
|
group starting with the same character. So, with a particular tag population,
|
|
the 'a' group might start with 'aaron newcomb', 'b' with 'b+ tree' and so
|
|
forth.
|
|
|
|
A further hash called B<%showtitles> is indexed by show number and holds the
|
|
title of the show. This has been added in preparation for producing a tag
|
|
index pages which have better accessibility features.
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
=over 4
|
|
|
|
=item B<Unable to find template ...>
|
|
|
|
Type: fatal
|
|
|
|
The template specified does not exist.
|
|
|
|
=item B<Unable to open ... for writing: ...>
|
|
|
|
Type: fatal
|
|
|
|
The nominated output file cannot be written to.
|
|
|
|
=item B<Unable to initialise for writing: ...>;
|
|
|
|
Type: fatal
|
|
|
|
Trying to write to STDOUT has failed.
|
|
|
|
=item B<Unable to find ...>
|
|
|
|
Type: fatal
|
|
|
|
The nominated configuration file cannot be found
|
|
|
|
=item B<various database errors>
|
|
|
|
Type: fatal
|
|
|
|
Failure while opening the database or preparing a query.
|
|
|
|
=item B<Errors from Template Toolkit>
|
|
|
|
Type: fatal
|
|
|
|
The template could not be processed
|
|
|
|
=back
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT
|
|
|
|
The script obtains the credentials it requires to open the HPR database from
|
|
a configuration file. The name of the file it expects is B<.hpr_db.cfg> in the
|
|
directory holding the script. To change this will require changing the script.
|
|
|
|
The configuration file format is as follows:
|
|
|
|
<database>
|
|
host = 127.0.0.1
|
|
port = PORT
|
|
name = DATABASE
|
|
user = USERNAME
|
|
password = PASSWORD
|
|
</database>
|
|
|
|
=head1 DEPENDENCIES
|
|
|
|
Config::General
|
|
DBI
|
|
Data::Dumper
|
|
Getopt::Long
|
|
Pod::Usage
|
|
Template
|
|
Template::Filters
|
|
Text::CSV_XS
|
|
|
|
=head1 BUGS AND LIMITATIONS
|
|
|
|
There are no known bugs in this module.
|
|
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
|
|
Patches are welcome.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Dave Morriss (Dave.Morriss@gmail.com)
|
|
|
|
=head1 LICENCE AND COPYRIGHT
|
|
|
|
Copyright (c) 2022 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.
|
|
|
|
This module is free software; you can redistribute it and/or
|
|
modify it under the same terms as Perl itself. See perldoc perlartistic.
|
|
|
|
=cut
|
|
|
|
#}}}
|
|
|
|
# [zo to open fold, zc to close]
|
|
|
|
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|
|
|