From d519936f98fea030faf62357735fcea61d99f800 Mon Sep 17 00:00:00 2001 From: Dave Morriss Date: Sun, 27 Aug 2023 13:22:11 +0100 Subject: [PATCH] Fixing Unicode problems site-generator: - Modification to the POD documentation - Addition of 'use 5.012' which enables various later Perl features - Addition of "use open ':encoding(UTF-8)'" which forces 'utf8' for all IO - Addition of 'use Template::Plugin::HTML::Strip' for consistency - Removal of other methods of making the default 'utf8' for IO - Removal of "'ENCODING => 'utf8'" when setting up a new template object. This allows template files to contain Unicode, but doesn't seem to be necessary - Changes to subroutine 'parse_csv': more comments, clarification of warning message, explicit conversion of tags which are marked as 'utf8' to this format using 'utf8::encode' from core Perl. --- site-generator | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/site-generator b/site-generator index bef4d08..71ec176 100755 --- a/site-generator +++ b/site-generator @@ -73,6 +73,7 @@ Perl Template Toolkit. * Template * Template::Plugin::File * Template::Plugin::DBI + * Template::Plugin::HTML::Strip * DBI * Tie::DBI * DBD::SQLite or DBD:mysql @@ -105,8 +106,10 @@ Perl Template Toolkit. # }}} +use 5.012; use strict; use warnings; +use open ':encoding(UTF-8)'; use Getopt::Long qw(:config auto_help); use Pod::Usage; @@ -114,11 +117,9 @@ use Config::Std; use Text::CSV_XS; use HTML::Entities qw(encode_entities_numeric); use Template; +use Template::Plugin::HTML::Strip; use Data::Dumper; -binmode STDOUT, ":encoding(UTF-8)"; -binmode STDERR, ":encoding(UTF-8)"; - exit main(); sub main { @@ -202,8 +203,6 @@ sub main { } if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') { - # Empty arrayref bug fixed, so count is reduced by 1 - # was: if (scalar @{$parsed_arg{'ids'}} == 1) { if (scalar @{$parsed_arg{'ids'}} == 0) { @{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config); } @@ -234,7 +233,6 @@ sub get_template_html (\%@) { return Template->new( { INCLUDE_PATH => $_[1]{templates_path}, OUTPUT_PATH => $_[1]{output_path}, - ENCODING => 'utf8', EVAL_PERL => 1, START_TAG => '', @@ -258,7 +256,8 @@ sub generate_page { $html = get_filename($$config); } $tt->process( $$config->{root_template}, - $$config, $html, { binmode => ':utf8' } ) + $$config, $html + ) || die $tt->error(), "\n"; } @@ -375,7 +374,11 @@ sub print_available_pages { # DESCRIPTION: The Text::CSV_XS module instance is created with the option # 'allow_whitespace' to be forgiving of any spaces around the # CSV elements and to strip them. Also, 'allow_loose_quotes' is -# forgiving of really messed up CSV. +# forgiving of really messed up CSV. The 'binary' option +# permits any characters in the tags (expecting Unicode). +# The fields parsed from the tag string is checked for the +# existence of utf8 characters and encoded to ensure any found +# are properly stored. # THROWS: No exceptions # COMMENTS: None # SEE ALSO: N/A @@ -392,11 +395,13 @@ sub parse_csv { ); my $status = $csv->parse($csv_in); unless ( $status ) { - warn "Failed to parse '$csv_in'\n" ; + warn "Failed to parse CSV '$csv_in'\n" ; return; } my @fields = $csv->fields(); + @fields = map {utf8::encode($_) if utf8::is_utf8($_); $_} @fields; + return \@fields; }