Fixing Unicode problems

site-generator:
    - Modification to the POD documentation
    - Addition of 'use 5.012' which enables various later Perl features
    - Addition of "use open ':encoding(UTF-8)'" which forces 'utf8' for
      all IO
    - Addition of 'use Template::Plugin::HTML::Strip' for consistency
    - Removal of other methods of making the default 'utf8' for IO
    - Removal of "'ENCODING => 'utf8'" when setting up a new template
      object. This allows template files to contain Unicode, but doesn't
      seem to be necessary
    - Changes to subroutine 'parse_csv': more comments, clarification of
      warning message, explicit conversion of tags which are marked as
      'utf8' to this format using 'utf8::encode' from core Perl.
This commit is contained in:
Dave Morriss 2023-08-27 13:22:11 +01:00
parent c263646cf2
commit d519936f98

View File

@ -73,6 +73,7 @@ Perl Template Toolkit.
* Template
* Template::Plugin::File
* Template::Plugin::DBI
* Template::Plugin::HTML::Strip
* DBI
* Tie::DBI
* DBD::SQLite or DBD:mysql
@ -105,8 +106,10 @@ Perl Template Toolkit.
# }}}
use 5.012;
use strict;
use warnings;
use open ':encoding(UTF-8)';
use Getopt::Long qw(:config auto_help);
use Pod::Usage;
@ -114,11 +117,9 @@ use Config::Std;
use Text::CSV_XS;
use HTML::Entities qw(encode_entities_numeric);
use Template;
use Template::Plugin::HTML::Strip;
use Data::Dumper;
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
exit main();
sub main {
@ -202,8 +203,6 @@ sub main {
}
if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') {
# Empty arrayref bug fixed, so count is reduced by 1
# was: if (scalar @{$parsed_arg{'ids'}} == 1) {
if (scalar @{$parsed_arg{'ids'}} == 0) {
@{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config);
}
@ -234,7 +233,6 @@ sub get_template_html (\%@) {
return Template->new(
{ INCLUDE_PATH => $_[1]{templates_path},
OUTPUT_PATH => $_[1]{output_path},
ENCODING => 'utf8',
EVAL_PERL => 1,
START_TAG => '<!--%',
END_TAG => '%-->',
@ -258,7 +256,8 @@ sub generate_page {
$html = get_filename($$config);
}
$tt->process( $$config->{root_template},
$$config, $html, { binmode => ':utf8' } )
$$config, $html
)
|| die $tt->error(), "\n";
}
@ -375,7 +374,11 @@ sub print_available_pages {
# DESCRIPTION: The Text::CSV_XS module instance is created with the option
# 'allow_whitespace' to be forgiving of any spaces around the
# CSV elements and to strip them. Also, 'allow_loose_quotes' is
# forgiving of really messed up CSV.
# forgiving of really messed up CSV. The 'binary' option
# permits any characters in the tags (expecting Unicode).
# The fields parsed from the tag string is checked for the
# existence of utf8 characters and encoded to ensure any found
# are properly stored.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
@ -392,11 +395,13 @@ sub parse_csv {
);
my $status = $csv->parse($csv_in);
unless ( $status ) {
warn "Failed to parse '$csv_in'\n" ;
warn "Failed to parse CSV '$csv_in'\n" ;
return;
}
my @fields = $csv->fields();
@fields = map {utf8::encode($_) if utf8::is_utf8($_); $_} @fields;
return \@fields;
}