forked from HPR/hpr-tools
Moved project directories and files to an empty local repo
This commit is contained in:
751
Show_Submission/fix_relative_links
Executable file
751
Show_Submission/fix_relative_links
Executable file
@@ -0,0 +1,751 @@
|
||||
#!/usr/bin/env perl
|
||||
#===============================================================================
|
||||
#
|
||||
# FILE: fix_relative_links
|
||||
#
|
||||
# USAGE: ./fix_relative_links [options] -episode=N FILE
|
||||
#
|
||||
# DESCRIPTION: Processes an HTML input file, looking for relative URLs. If
|
||||
# any are found these are made absolute using the -baseURL=URL
|
||||
# option or a default. The intention is to make them into
|
||||
# HPR-absolute URLs.
|
||||
#
|
||||
# OPTIONS: ---
|
||||
# REQUIREMENTS: ---
|
||||
# BUGS: ---
|
||||
# NOTES: ---
|
||||
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
|
||||
# VERSION: 0.0.3
|
||||
# CREATED: 2022-10-14 11:56:03
|
||||
# REVISION: 2022-10-23 22:12:08
|
||||
#
|
||||
#===============================================================================
|
||||
|
||||
use v5.16;
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
use feature qw{ postderef say signatures state };
|
||||
no warnings qw{ experimental::postderef experimental::signatures };
|
||||
|
||||
use Carp;
|
||||
use Getopt::Long;
|
||||
use Pod::Usage;
|
||||
|
||||
use File::Basename;
|
||||
|
||||
use IO::HTML;
|
||||
use HTML::TreeBuilder 5 -weak;
|
||||
use URI;
|
||||
|
||||
use Log::Handler;
|
||||
|
||||
use Data::Dumper;
|
||||
|
||||
#
|
||||
# Version number (manually incremented)
|
||||
#
|
||||
our $VERSION = '0.0.3';
|
||||
|
||||
#
|
||||
# Script and directory names
|
||||
#
|
||||
( my $PROG = $0 ) =~ s|.*/||mx;
|
||||
( my $DIR = $0 ) =~ s|/?[^/]*$||mx;
|
||||
$DIR = '.' unless $DIR;
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Declarations
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# Constants and other declarations
|
||||
#
|
||||
my $basedir = "$ENV{HOME}/HPR/Show_Submission";
|
||||
my $logdir = "$basedir/logs";
|
||||
my $logfile = "$logdir/${PROG}.log";
|
||||
|
||||
#
|
||||
# Variables, arrays and hashes
|
||||
#
|
||||
my ( $DEBUG, $verbose, $silent, $showno, $base_URL, $fragment, $count_only );
|
||||
my ( $outfile, $filename, $showdir, $changes, $html );
|
||||
|
||||
#
|
||||
# Enable Unicode mode
|
||||
#
|
||||
binmode STDOUT, ":encoding(UTF-8)";
|
||||
binmode STDERR, ":encoding(UTF-8)";
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Options and arguments
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# Option defaults
|
||||
#
|
||||
my $DEFDEBUG = 0;
|
||||
|
||||
my %options;
|
||||
Options( \%options );
|
||||
|
||||
#
|
||||
# Default help shows minimal information
|
||||
#
|
||||
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 )
|
||||
if ( $options{'help'} );
|
||||
|
||||
#
|
||||
# The -documentation or -man option shows the full POD documentation through
|
||||
# a pager for convenience
|
||||
#
|
||||
pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 )
|
||||
if ( $options{'documentation'} );
|
||||
|
||||
#
|
||||
# Collect options
|
||||
#
|
||||
$DEBUG = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG );
|
||||
$showno = $options{episode};
|
||||
$base_URL = $options{baseURL};
|
||||
$fragment = ( defined( $options{fragment} ) ? $options{fragment} : 0 );
|
||||
$count_only = ( defined( $options{count} ) ? $options{count} : 0 );
|
||||
$outfile = $options{output};
|
||||
|
||||
#
|
||||
# Argument
|
||||
#
|
||||
$filename = shift;
|
||||
|
||||
#
|
||||
# Sanity checks
|
||||
#
|
||||
pod2usage(
|
||||
-msg => "$PROG version $VERSION\nShow number missing\n",
|
||||
-exitval => 1,
|
||||
-verbose => 0
|
||||
) unless $showno;
|
||||
|
||||
pod2usage(
|
||||
-msg => "$PROG version $VERSION\nInput file name missing\n",
|
||||
-exitval => 1,
|
||||
-verbose => 0
|
||||
) unless $filename;
|
||||
|
||||
#
|
||||
# Add leading zeroes to the show number if necessary
|
||||
#
|
||||
$showno = sprintf( '%04d', $showno );
|
||||
|
||||
#
|
||||
# Directories and files specific to this show
|
||||
#
|
||||
$showdir = "$basedir/shownotes/hpr$showno";
|
||||
|
||||
#
|
||||
# Allow the input filename to be a bare name
|
||||
#
|
||||
if ( !-e $filename ) {
|
||||
$filename = "$showdir/$filename";
|
||||
}
|
||||
die "Unable to find $filename" unless ( -e $filename );
|
||||
|
||||
#
|
||||
# Work on the output file, allowing defaults and substitution points for
|
||||
# convenience. If there's no outfile we'll just process the HTML and nothing
|
||||
# more.
|
||||
#
|
||||
if ( defined($outfile) ) {
|
||||
$outfile = output_file_name( $outfile, $showno, 'hpr%d_new.html' );
|
||||
$outfile = "$showdir/$outfile" if (dirname($outfile) eq '.');
|
||||
}
|
||||
|
||||
#
|
||||
# Default base URL
|
||||
#
|
||||
unless ($base_URL) {
|
||||
$base_URL = "https://hackerpublicradio.org/eps/hpr$showno/";
|
||||
}
|
||||
|
||||
#
|
||||
# Base URL must have a trailing '/'
|
||||
#
|
||||
$base_URL .= '/' unless ( $base_URL =~ qr{/$} );
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Set up logging keeping the default log layout except for the date. The format
|
||||
# is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is
|
||||
# the message.
|
||||
#-------------------------------------------------------------------------------
|
||||
my $log = Log::Handler->new();
|
||||
|
||||
$log->add(
|
||||
file => {
|
||||
timeformat => "%Y/%m/%d %H:%M:%S",
|
||||
filename => $logfile,
|
||||
minlevel => 0,
|
||||
maxlevel => 7,
|
||||
}
|
||||
);
|
||||
|
||||
#
|
||||
# Log preamble
|
||||
#
|
||||
$log->info("Show number: $showno");
|
||||
$log->info("Processing: $filename");
|
||||
$log->info("Base: $base_URL");
|
||||
|
||||
#
|
||||
# Find and change any relative URLs returning the number of changes and the
|
||||
# altered HTML
|
||||
#
|
||||
( $changes, $html )
|
||||
= find_links_in_file( $filename, $base_URL, $fragment, $count_only );
|
||||
|
||||
$log->info("Number of changes: $changes");
|
||||
|
||||
#
|
||||
# Exit without writing if we're just counting
|
||||
#
|
||||
if ($count_only) {
|
||||
$log->info("Count only mode");
|
||||
exit $changes;
|
||||
}
|
||||
|
||||
#
|
||||
# Exit without writing if there were no changes
|
||||
#
|
||||
if ($changes == 0) {
|
||||
$log->info("No output written");
|
||||
exit $changes;
|
||||
}
|
||||
|
||||
#
|
||||
# Write output if an output file was specified
|
||||
#
|
||||
if ($outfile) {
|
||||
write_output( $outfile, $html );
|
||||
$log->info("Changes applied; written to $outfile");
|
||||
}
|
||||
else {
|
||||
$log->info("No output written");
|
||||
}
|
||||
|
||||
exit $changes;
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: find_links_in_file
|
||||
# PURPOSE: Finds relative links in an HTML file
|
||||
# PARAMETERS: $filename the name of the file we're parsing
|
||||
# $base_URL the part of the full URL we'll replace
|
||||
# $fragment Boolean signalling whether to treat the HTML
|
||||
# as a fragment or an entire document
|
||||
# $count_only Boolean signalling that all we want is the
|
||||
# count of relative URLs, no action is to be taken
|
||||
# RETURNS: The number of URLs "repaired".
|
||||
# DESCRIPTION:
|
||||
# THROWS: No exceptions
|
||||
# COMMENTS: None
|
||||
# SEE ALSO: N/A
|
||||
#===============================================================================
|
||||
sub find_links_in_file {
|
||||
my ( $filename, $base_URL, $fragment, $count_only ) = @_;
|
||||
|
||||
my ( $base_uri, $tree, $uri_orig, $uri_new );
|
||||
my ( $newlink, $linkedits, $result );
|
||||
|
||||
#
|
||||
# Parse the base URL
|
||||
#
|
||||
$base_uri = URI->new($base_URL);
|
||||
|
||||
#
|
||||
# Create a tree object
|
||||
#
|
||||
$tree = HTML::TreeBuilder->new;
|
||||
$tree->ignore_unknown(0);
|
||||
$tree->no_expand_entities(1);
|
||||
$tree->p_strict(1);
|
||||
$tree->store_comments(1);
|
||||
$tree->warn(1);
|
||||
|
||||
#
|
||||
# Parse the file using IO::HTML to grab it. Die if we fail because then we
|
||||
# know this stuff needs some urgent attention.
|
||||
#
|
||||
$tree->parse_file( html_file($filename) )
|
||||
or die "HTML::TreeBuilder failed to process $filename: $!\n";
|
||||
|
||||
$linkedits = 0;
|
||||
|
||||
#
|
||||
# Scan for all anchors and images using the HTML::Element method
|
||||
# 'extract_links'. The technique used here is from the HTML::Element man
|
||||
# page.
|
||||
#
|
||||
for ( @{ $tree->extract_links( 'a', 'img' ) } ) {
|
||||
my ( $link, $element, $attr, $tag ) = @$_;
|
||||
|
||||
#
|
||||
# Parse the link
|
||||
#
|
||||
$uri_orig = URI->new($link);
|
||||
|
||||
#
|
||||
# A relative link (presumably) doesn't have a scheme
|
||||
#
|
||||
unless ( $uri_orig->scheme ) {
|
||||
#
|
||||
# Original link
|
||||
#
|
||||
say "Relative link: $link";
|
||||
|
||||
#
|
||||
# Make the link absolute
|
||||
#
|
||||
$uri_new = make_absolute( $uri_orig, $base_uri );
|
||||
# $uri_new = URI->new_abs( $link, $base_URL );
|
||||
$newlink = sprintf( "%s:%s", $uri_new->scheme, $uri_new->opaque );
|
||||
say "Absolute link: $newlink";
|
||||
|
||||
#
|
||||
# Modify the HTML to make the relative absolute
|
||||
#
|
||||
if ( $uri_orig->fragment ) {
|
||||
# Not sure if we need to cater for URI fragments, but we'll try it
|
||||
$element->attr( $attr, $newlink . '#' . $uri_orig->fragment );
|
||||
}
|
||||
else {
|
||||
$element->attr( $attr, $newlink );
|
||||
}
|
||||
|
||||
$linkedits++;
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
# Exit here if we were just asked to count
|
||||
#
|
||||
return ( $linkedits, undef ) if $count_only;
|
||||
|
||||
#
|
||||
# In 'HTML fragment' mode generate the body part without the <body> tags.
|
||||
#
|
||||
if ($fragment) {
|
||||
my $body = $tree->look_down( _tag => 'body' );
|
||||
( $result = $body->as_HTML( undef, ' ', {} ) )
|
||||
=~ s{(^<body[^>]*>|</body>$)}{}gi;
|
||||
}
|
||||
else {
|
||||
$result = $tree->as_HTML( undef, ' ', {} );
|
||||
}
|
||||
|
||||
return ( $linkedits, $result );
|
||||
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: write_output
|
||||
# PURPOSE: Write the "repaired" HTML
|
||||
# PARAMETERS: $outfile name of the output file
|
||||
# $html the HTML to write out
|
||||
# RETURNS: Nothing
|
||||
# DESCRIPTION:
|
||||
# THROWS: No exceptions
|
||||
# COMMENTS: None
|
||||
# SEE ALSO: N/A
|
||||
#===============================================================================
|
||||
sub write_output {
|
||||
my ( $outfile, $html ) = @_;
|
||||
|
||||
open( my $out, '>:encoding(UTF-8)', $outfile )
|
||||
or die "Unable to open $outfile for output: $!\n";
|
||||
|
||||
print $out $html;
|
||||
|
||||
close($out);
|
||||
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: make_absolute
|
||||
# PURPOSE: Take a relative URI and a base URI and return the absolute URI
|
||||
# PARAMETERS: $relative relative URL as a URI object
|
||||
# $base base URL as a URI object
|
||||
# RETURNS: Absolute URL as a URI object
|
||||
# DESCRIPTION:
|
||||
# THROWS: No exceptions
|
||||
# COMMENTS: None
|
||||
# SEE ALSO: N/A
|
||||
#===============================================================================
|
||||
sub make_absolute {
|
||||
my ( $relative, $base ) = @_;
|
||||
|
||||
my ( %base_path, @relative_path, $absolute );
|
||||
|
||||
#
|
||||
# Chop up the path from the base and store in a hash
|
||||
#
|
||||
%base_path = map { $_ => 1 } split( '/', $base->path );
|
||||
|
||||
#
|
||||
# Chop up the relative path
|
||||
#
|
||||
@relative_path = split( '/', $relative->path );
|
||||
|
||||
#
|
||||
# Remove relative path elements if they are in the base
|
||||
#
|
||||
@relative_path = grep { !exists( $base_path{$_} ) } @relative_path;
|
||||
|
||||
#
|
||||
# If the relative path is empty we assume it's referring to the
|
||||
# 'index.html' file.
|
||||
#
|
||||
push( @relative_path, 'index.html' ) unless (@relative_path);
|
||||
|
||||
#
|
||||
# Rebuild the relative path
|
||||
#
|
||||
$relative->path( join( '/', @relative_path ) );
|
||||
|
||||
#
|
||||
# Return the result of joining relative URL and base URL
|
||||
#
|
||||
$absolute = URI->new_abs( $relative->as_string, $base->as_string );
|
||||
|
||||
return $absolute;
|
||||
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: output_file_name
|
||||
# PURPOSE: Generate an output file name with three choices
|
||||
# PARAMETERS: $optarg the argument to the option choosing the filename
|
||||
# $showno the show number to add to certain name types
|
||||
# $template a default 'sprintf' template for the name
|
||||
# RETURNS: The name of the output file
|
||||
# DESCRIPTION: If there's a defined output filename then there are three
|
||||
# options: a null string, a plain filename and a substitution
|
||||
# string with '%d' sequences. The null string means the user used
|
||||
# '-option' without a value, so we want to generate a substitution
|
||||
# string. A string with '%d' requires a check to ensure there's
|
||||
# the right number, just one. The plain filename needs no more
|
||||
# work.
|
||||
# THROWS: No exceptions
|
||||
# COMMENTS: None
|
||||
# SEE ALSO: N/A
|
||||
#===============================================================================
|
||||
sub output_file_name {
|
||||
my ( $optarg, $showno, $template ) = @_;
|
||||
|
||||
my ( $filename, $count );
|
||||
|
||||
#
|
||||
# We shouldn't be called with a null option argument
|
||||
#
|
||||
return unless defined($optarg);
|
||||
|
||||
#
|
||||
# Does the option have an argument?
|
||||
#
|
||||
if ( $optarg =~ /^$/ ) {
|
||||
#
|
||||
# No argument; use the show number from the -episode=N option
|
||||
#
|
||||
$filename = sprintf( $template, $showno );
|
||||
}
|
||||
elsif ( $optarg =~ /%d/ ) {
|
||||
#
|
||||
# There's an argument, does it have a '%d' in it?
|
||||
#
|
||||
$count = () = $optarg =~ /%d/g;
|
||||
die "Invalid - too many '%d' sequences in '$optarg'\n"
|
||||
if ( $count > 1 );
|
||||
$filename = sprintf( $optarg, $showno );
|
||||
}
|
||||
else {
|
||||
#
|
||||
# It's a plain filename, just return it
|
||||
#
|
||||
$filename = $optarg;
|
||||
}
|
||||
|
||||
return $filename;
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: coalesce
|
||||
# PURPOSE: To find the first defined argument and return it
|
||||
# PARAMETERS: Arbitrary number of arguments
|
||||
# RETURNS: The first defined argument or undef if there are none
|
||||
# DESCRIPTION: Just a simple way of ensuring an 'undef' value is never
|
||||
# returned when doing so might be a problem.
|
||||
# THROWS: No exceptions
|
||||
# COMMENTS: None
|
||||
# SEE ALSO: N/A
|
||||
#===============================================================================
|
||||
sub coalesce {
|
||||
foreach (@_) {
|
||||
return $_ if defined($_);
|
||||
}
|
||||
return undef; ## no critic
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: _debug
|
||||
# PURPOSE: Prints debug reports
|
||||
# PARAMETERS: $active Boolean: 1 for print, 0 for no print
|
||||
# $message Message to print
|
||||
# RETURNS: Nothing
|
||||
# DESCRIPTION: Outputs a message if $active is true. It removes any trailing
|
||||
# newline and then adds one in the 'print' to the caller doesn't
|
||||
# have to bother. Prepends the message with 'D> ' to show it's
|
||||
# a debug message.
|
||||
# THROWS: No exceptions
|
||||
# COMMENTS: None
|
||||
# SEE ALSO: N/A
|
||||
#===============================================================================
|
||||
sub _debug {
|
||||
my ( $active, $message ) = @_;
|
||||
|
||||
chomp($message);
|
||||
print "D> $message\n" if $active;
|
||||
}
|
||||
|
||||
#=== FUNCTION ================================================================
|
||||
# NAME: Options
|
||||
# PURPOSE: Processes command-line options
|
||||
# PARAMETERS: $optref Hash reference to hold the options
|
||||
# RETURNS: Undef
|
||||
# DESCRIPTION: Process the options we want to offer. See the documentation
|
||||
# for details
|
||||
# THROWS: no exceptions
|
||||
# COMMENTS: none
|
||||
# SEE ALSO: n/a
|
||||
#===============================================================================
|
||||
sub Options {
|
||||
my ($optref) = @_;
|
||||
|
||||
my @options = (
|
||||
"help", "documentation|man", "debug=i", "episode=i",
|
||||
"baseURL=s", "fragment!", "count!", "output:s",
|
||||
);
|
||||
|
||||
if ( !GetOptions( $optref, @options ) ) {
|
||||
pod2usage(
|
||||
-msg => "$PROG version $VERSION\n",
|
||||
-exitval => 1,
|
||||
-verbose => 0
|
||||
);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__END__
|
||||
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
# Application Documentation
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
#{{{
|
||||
|
||||
=head1 NAME
|
||||
|
||||
fix_relative_links - Repair relative URLs in HTML shownotes
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
This documentation refers to fix_relative_links version 0.0.3
|
||||
|
||||
=head1 USAGE
|
||||
|
||||
./fix_relative_links -ep=3705 shownotes/hpr3705/hpr3705.html -fragment
|
||||
|
||||
=head1 REQUIRED ARGUMENTS
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<filename>
|
||||
|
||||
The name of the file containing the HTML to be repaired. If no path is given
|
||||
this will be supplied by the script as:
|
||||
|
||||
~/HPR/Show_Submission/shownotes/hpr${show}/
|
||||
|
||||
It is probably wiser to be explicit about the path to the HTML file to be
|
||||
parsed.
|
||||
|
||||
=back
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 8
|
||||
|
||||
=item B<-help>
|
||||
|
||||
Prints a brief help message describing the usage of the program, and then exits.
|
||||
|
||||
=item B<-documentation> or B<-man>
|
||||
|
||||
Prints the entire documentation for the script in the form of a manual page.
|
||||
|
||||
=item B<-debug=N>
|
||||
|
||||
Causes certain debugging information to be displayed.
|
||||
|
||||
0 (the default) no debug output
|
||||
1
|
||||
2
|
||||
3
|
||||
|
||||
=item B<-episode=N>
|
||||
|
||||
This option is mandatory and specifies the show number being processed. The
|
||||
number is used to generate default file names and paths as well as the default
|
||||
base URL described below.
|
||||
|
||||
=item B<-baseURL=URL>
|
||||
|
||||
This option will default to the foillowing URL if not provided:
|
||||
|
||||
https://hackerpublicradio.org/eps/hpr${show}/
|
||||
|
||||
It can be used to define a non-standard URL, such as one at a lower level than
|
||||
the example above which might contain thumbnail pictures for example.
|
||||
|
||||
=item B<-[no]fragment>
|
||||
|
||||
This Boolean option defines the HTML being parsed and checked as a fragment or
|
||||
a complete stand-alone document. By default B<-nofragment> is assumed. It is
|
||||
necessary to use B<fragment> for the case where the HTML shownotes are being
|
||||
parsed.
|
||||
|
||||
=item B<-[no]count>
|
||||
|
||||
This Boolean option defines whether to simply count the necessary changes or
|
||||
to apply them to the given HTML file. By default B<-nocount> is assumed, and
|
||||
changes will be applied.
|
||||
|
||||
=item B<-output[=FILE]>
|
||||
|
||||
This option can be omitted or can be given without the B<FILE> name. If
|
||||
omitted entirely no output will be written even though the HTML file has been
|
||||
read and processed. If specified without the output file name the default name
|
||||
will be B<hpr${show}_new.html>. If no path is specified with the file name
|
||||
then a default will be generated as:
|
||||
|
||||
~/HPR/Show_Submission/shownotes/hpr${show}/hpr${show}_new.html
|
||||
|
||||
The output file name can be given in the form of a B<printf> template such as:
|
||||
|
||||
hpr%d_new.html
|
||||
|
||||
and the B<%d> will be replaced by the show number given through the
|
||||
B<-episode=N> option described above.
|
||||
|
||||
=back
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The script reads a file of HTML which has either been submitted by an HPR host
|
||||
as it is or has been generated from one of the markup languages accepted in
|
||||
the upload form. Most often this file will contain the main notes for a show
|
||||
and will eventually be saved in the HPR database.
|
||||
|
||||
It is also possible to use the script to process other HTML files submitted
|
||||
with an HPR show.
|
||||
|
||||
The purpose of the script is to find relative URLs in the HTML and convert
|
||||
them to absolute ones. The HPR website requests that absolute URLs be used
|
||||
since then they can be used in the various RSS feeds which are available, but
|
||||
many hosts forget to follow this request.
|
||||
|
||||
The HTML is parsed using B<HTML::TreeBuilder> and is searched for B<a> or
|
||||
B<img> tags. These are checked to ensure they contain absolute links, and if
|
||||
not are converted appropriately using a base URL for the HPR website.
|
||||
|
||||
A count of changes is returned by the script and the converted HTML is written
|
||||
out to a file if required. The script can be used to see if any conversions
|
||||
are necessary before making the changes.
|
||||
|
||||
The script is also capable of treating full HTML documents differently from
|
||||
the HTML fragments that are stored in the HPR database. An option is required
|
||||
to specify which type of HTML is being processed.
|
||||
|
||||
=head1 DIAGNOSTICS
|
||||
|
||||
Error and warning messages generated by the script.
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<Unable to find ...>
|
||||
|
||||
Type: fatal
|
||||
|
||||
The script was unable to find the specified input file.
|
||||
|
||||
=item B<HTML::TreeBuilder failed to process ...: ...>
|
||||
|
||||
Type: fatal
|
||||
|
||||
The script attempted to use B<HTML::TreeBuilder> to parse the input file
|
||||
but failed. The message also contains details of the failure.
|
||||
|
||||
=item B<Unable to open ... for output: ...>
|
||||
|
||||
Type: fatal
|
||||
|
||||
The script attempted to open the requested output file but failed. The reason
|
||||
for the failure is included in the error message.
|
||||
|
||||
=item B<Invalid - too many '%d' sequences in '...'>
|
||||
|
||||
Type: fatal
|
||||
|
||||
The script attempted to generate a name for the requested output file using
|
||||
the supplied template, but failed because there were too many B<%d> elements
|
||||
in the template. Only one should be provided, which will be substituted with
|
||||
the show number.
|
||||
|
||||
=back
|
||||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
Carp
|
||||
Data::Dumper
|
||||
File::Basename
|
||||
Getopt::Long
|
||||
HTML::TreeBuilder
|
||||
IO::HTML
|
||||
Log::Handler
|
||||
Pod::Usage
|
||||
URI
|
||||
|
||||
=head1 BUGS AND LIMITATIONS
|
||||
|
||||
There are no known bugs in this module.
|
||||
Please report problems to Dave Morriss (Dave.Morriss@gmail.com)
|
||||
Patches are welcome.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Dave Morriss (Dave.Morriss@gmail.com)
|
||||
|
||||
=head1 LICENCE AND COPYRIGHT
|
||||
|
||||
Copyright (c) <year> Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.
|
||||
|
||||
This module is free software; you can redistribute it and/or
|
||||
modify it under the same terms as Perl itself. See perldoc perlartistic.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
=cut
|
||||
|
||||
#}}}
|
||||
|
||||
# [zo to open fold, zc to close]
|
||||
|
||||
# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
|
||||
|
Reference in New Issue
Block a user