Archived
4
2

Fixing Unicode problems

site-generator:
    - Modification to the POD documentation
    - Addition of 'use 5.012' which enables various later Perl features
    - Addition of "use open ':encoding(UTF-8)'" which forces 'utf8' for
      all IO
    - Addition of 'use Template::Plugin::HTML::Strip' for consistency
    - Removal of other methods of making the default 'utf8' for IO
    - Removal of "'ENCODING => 'utf8'" when setting up a new template
      object. This allows template files to contain Unicode, but doesn't
      seem to be necessary
    - Changes to subroutine 'parse_csv': more comments, clarification of
      warning message, explicit conversion of tags which are marked as
      'utf8' to this format using 'utf8::encode' from core Perl.
This commit is contained in:
Dave Morriss 2023-08-27 13:22:11 +01:00
parent c263646cf2
commit d519936f98

View File

@ -73,6 +73,7 @@ Perl Template Toolkit.
* Template * Template
* Template::Plugin::File * Template::Plugin::File
* Template::Plugin::DBI * Template::Plugin::DBI
* Template::Plugin::HTML::Strip
* DBI * DBI
* Tie::DBI * Tie::DBI
* DBD::SQLite or DBD:mysql * DBD::SQLite or DBD:mysql
@ -105,8 +106,10 @@ Perl Template Toolkit.
# }}} # }}}
use 5.012;
use strict; use strict;
use warnings; use warnings;
use open ':encoding(UTF-8)';
use Getopt::Long qw(:config auto_help); use Getopt::Long qw(:config auto_help);
use Pod::Usage; use Pod::Usage;
@ -114,11 +117,9 @@ use Config::Std;
use Text::CSV_XS; use Text::CSV_XS;
use HTML::Entities qw(encode_entities_numeric); use HTML::Entities qw(encode_entities_numeric);
use Template; use Template;
use Template::Plugin::HTML::Strip;
use Data::Dumper; use Data::Dumper;
binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";
exit main(); exit main();
sub main { sub main {
@ -202,8 +203,6 @@ sub main {
} }
if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') { if ($page_config->{'multipage'} && $page_config->{'multipage'} eq 'true') {
# Empty arrayref bug fixed, so count is reduced by 1
# was: if (scalar @{$parsed_arg{'ids'}} == 1) {
if (scalar @{$parsed_arg{'ids'}} == 0) { if (scalar @{$parsed_arg{'ids'}} == 0) {
@{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config); @{$parsed_arg{'ids'}} = get_ids_from_db($tt, \$page_config);
} }
@ -234,7 +233,6 @@ sub get_template_html (\%@) {
return Template->new( return Template->new(
{ INCLUDE_PATH => $_[1]{templates_path}, { INCLUDE_PATH => $_[1]{templates_path},
OUTPUT_PATH => $_[1]{output_path}, OUTPUT_PATH => $_[1]{output_path},
ENCODING => 'utf8',
EVAL_PERL => 1, EVAL_PERL => 1,
START_TAG => '<!--%', START_TAG => '<!--%',
END_TAG => '%-->', END_TAG => '%-->',
@ -258,7 +256,8 @@ sub generate_page {
$html = get_filename($$config); $html = get_filename($$config);
} }
$tt->process( $$config->{root_template}, $tt->process( $$config->{root_template},
$$config, $html, { binmode => ':utf8' } ) $$config, $html
)
|| die $tt->error(), "\n"; || die $tt->error(), "\n";
} }
@ -375,7 +374,11 @@ sub print_available_pages {
# DESCRIPTION: The Text::CSV_XS module instance is created with the option # DESCRIPTION: The Text::CSV_XS module instance is created with the option
# 'allow_whitespace' to be forgiving of any spaces around the # 'allow_whitespace' to be forgiving of any spaces around the
# CSV elements and to strip them. Also, 'allow_loose_quotes' is # CSV elements and to strip them. Also, 'allow_loose_quotes' is
# forgiving of really messed up CSV. # forgiving of really messed up CSV. The 'binary' option
# permits any characters in the tags (expecting Unicode).
# The fields parsed from the tag string is checked for the
# existence of utf8 characters and encoded to ensure any found
# are properly stored.
# THROWS: No exceptions # THROWS: No exceptions
# COMMENTS: None # COMMENTS: None
# SEE ALSO: N/A # SEE ALSO: N/A
@ -392,11 +395,13 @@ sub parse_csv {
); );
my $status = $csv->parse($csv_in); my $status = $csv->parse($csv_in);
unless ( $status ) { unless ( $status ) {
warn "Failed to parse '$csv_in'\n" ; warn "Failed to parse CSV '$csv_in'\n" ;
return; return;
} }
my @fields = $csv->fields(); my @fields = $csv->fields();
@fields = map {utf8::encode($_) if utf8::is_utf8($_); $_} @fields;
return \@fields; return \@fields;
} }