forked from HPR/hpr-tools
		
	Moved project directories and files to an empty local repo
This commit is contained in:
		
							
								
								
									
										751
									
								
								Show_Submission/fix_relative_links
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										751
									
								
								Show_Submission/fix_relative_links
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,751 @@ | ||||
| #!/usr/bin/env perl | ||||
| #=============================================================================== | ||||
| # | ||||
| #         FILE: fix_relative_links | ||||
| # | ||||
| #        USAGE: ./fix_relative_links [options] -episode=N FILE | ||||
| # | ||||
| #  DESCRIPTION: Processes an HTML input file, looking for relative URLs. If | ||||
| #               any are found these are made absolute using the -baseURL=URL | ||||
| #               option or a default. The intention is to make them into | ||||
| #               HPR-absolute URLs. | ||||
| # | ||||
| #      OPTIONS: --- | ||||
| # REQUIREMENTS: --- | ||||
| #         BUGS: --- | ||||
| #        NOTES: --- | ||||
| #       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com | ||||
| #      VERSION: 0.0.3 | ||||
| #      CREATED: 2022-10-14 11:56:03 | ||||
| #     REVISION: 2022-10-23 22:12:08 | ||||
| # | ||||
| #=============================================================================== | ||||
|  | ||||
| use v5.16; | ||||
| use strict; | ||||
| use warnings; | ||||
| use utf8; | ||||
| use feature qw{ postderef say signatures state }; | ||||
| no warnings qw{ experimental::postderef experimental::signatures }; | ||||
|  | ||||
| use Carp; | ||||
| use Getopt::Long; | ||||
| use Pod::Usage; | ||||
|  | ||||
| use File::Basename; | ||||
|  | ||||
| use IO::HTML; | ||||
| use HTML::TreeBuilder 5 -weak; | ||||
| use URI; | ||||
|  | ||||
| use Log::Handler; | ||||
|  | ||||
| use Data::Dumper; | ||||
|  | ||||
| # | ||||
| # Version number (manually incremented) | ||||
| # | ||||
| our $VERSION = '0.0.3'; | ||||
|  | ||||
| # | ||||
| # Script and directory names | ||||
| # | ||||
| ( my $PROG = $0 ) =~ s|.*/||mx; | ||||
| ( my $DIR  = $0 ) =~ s|/?[^/]*$||mx; | ||||
| $DIR = '.' unless $DIR; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Declarations | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # Constants and other declarations | ||||
| # | ||||
| my $basedir = "$ENV{HOME}/HPR/Show_Submission"; | ||||
| my $logdir  = "$basedir/logs"; | ||||
| my $logfile = "$logdir/${PROG}.log"; | ||||
|  | ||||
| # | ||||
| # Variables, arrays and hashes | ||||
| # | ||||
| my ( $DEBUG, $verbose, $silent, $showno, $base_URL, $fragment, $count_only ); | ||||
| my ( $outfile, $filename, $showdir, $changes, $html ); | ||||
|  | ||||
| # | ||||
| # Enable Unicode mode | ||||
| # | ||||
| binmode STDOUT, ":encoding(UTF-8)"; | ||||
| binmode STDERR, ":encoding(UTF-8)"; | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Options and arguments | ||||
| #------------------------------------------------------------------------------- | ||||
| # | ||||
| # Option defaults | ||||
| # | ||||
| my $DEFDEBUG = 0; | ||||
|  | ||||
| my %options; | ||||
| Options( \%options ); | ||||
|  | ||||
| # | ||||
| # Default help shows minimal information | ||||
| # | ||||
| pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 0 ) | ||||
|     if ( $options{'help'} ); | ||||
|  | ||||
| # | ||||
| # The -documentation or -man option shows the full POD documentation through | ||||
| # a pager for convenience | ||||
| # | ||||
| pod2usage( -msg => "$PROG version $VERSION\n", -exitval => 1, -verbose => 2 ) | ||||
|     if ( $options{'documentation'} ); | ||||
|  | ||||
| # | ||||
| # Collect options | ||||
| # | ||||
| $DEBUG      = ( defined( $options{debug} ) ? $options{debug} : $DEFDEBUG ); | ||||
| $showno     = $options{episode}; | ||||
| $base_URL   = $options{baseURL}; | ||||
| $fragment   = ( defined( $options{fragment} ) ? $options{fragment} : 0 ); | ||||
| $count_only = ( defined( $options{count} )    ? $options{count}    : 0 ); | ||||
| $outfile    = $options{output}; | ||||
|  | ||||
| # | ||||
| # Argument | ||||
| # | ||||
| $filename = shift; | ||||
|  | ||||
| # | ||||
| # Sanity checks | ||||
| # | ||||
| pod2usage( | ||||
|     -msg     => "$PROG version $VERSION\nShow number missing\n", | ||||
|     -exitval => 1, | ||||
|     -verbose => 0 | ||||
| ) unless $showno; | ||||
|  | ||||
| pod2usage( | ||||
|     -msg     => "$PROG version $VERSION\nInput file name missing\n", | ||||
|     -exitval => 1, | ||||
|     -verbose => 0 | ||||
| ) unless $filename; | ||||
|  | ||||
| # | ||||
| # Add leading zeroes to the show number if necessary | ||||
| # | ||||
| $showno = sprintf( '%04d', $showno ); | ||||
|  | ||||
| # | ||||
| # Directories and files specific to this show | ||||
| # | ||||
| $showdir = "$basedir/shownotes/hpr$showno"; | ||||
|  | ||||
| # | ||||
| # Allow the input filename to be a bare name | ||||
| # | ||||
| if ( !-e $filename ) { | ||||
|     $filename = "$showdir/$filename"; | ||||
| } | ||||
| die "Unable to find $filename" unless ( -e $filename ); | ||||
|  | ||||
| # | ||||
| # Work on the output file, allowing defaults and substitution points for | ||||
| # convenience. If there's no outfile we'll just process the HTML and nothing | ||||
| # more. | ||||
| # | ||||
| if ( defined($outfile) ) { | ||||
|     $outfile = output_file_name( $outfile, $showno, 'hpr%d_new.html' ); | ||||
|     $outfile = "$showdir/$outfile" if (dirname($outfile) eq '.'); | ||||
| } | ||||
|  | ||||
| # | ||||
| # Default base URL | ||||
| # | ||||
| unless ($base_URL) { | ||||
|     $base_URL = "https://hackerpublicradio.org/eps/hpr$showno/"; | ||||
| } | ||||
|  | ||||
| # | ||||
| # Base URL must have a trailing '/' | ||||
| # | ||||
| $base_URL .= '/' unless ( $base_URL =~ qr{/$} ); | ||||
|  | ||||
| #------------------------------------------------------------------------------- | ||||
| # Set up logging keeping the default log layout except for the date. The format | ||||
| # is "%T [%L] %m" where '%T' is the timestamp, '%L' is the log level and '%m is | ||||
| # the message. | ||||
| #------------------------------------------------------------------------------- | ||||
| my $log = Log::Handler->new(); | ||||
|  | ||||
| $log->add( | ||||
|     file => { | ||||
|         timeformat => "%Y/%m/%d %H:%M:%S", | ||||
|         filename   => $logfile, | ||||
|         minlevel   => 0, | ||||
|         maxlevel   => 7, | ||||
|     } | ||||
| ); | ||||
|  | ||||
| # | ||||
| # Log preamble | ||||
| # | ||||
| $log->info("Show number: $showno"); | ||||
| $log->info("Processing: $filename"); | ||||
| $log->info("Base: $base_URL"); | ||||
|  | ||||
| # | ||||
| # Find and change any relative URLs returning the number of changes and the | ||||
| # altered HTML | ||||
| # | ||||
| ( $changes, $html ) | ||||
|     = find_links_in_file( $filename, $base_URL, $fragment, $count_only ); | ||||
|  | ||||
| $log->info("Number of changes: $changes"); | ||||
|  | ||||
| # | ||||
| # Exit without writing if we're just counting | ||||
| # | ||||
| if ($count_only) { | ||||
|     $log->info("Count only mode"); | ||||
|     exit $changes; | ||||
| } | ||||
|  | ||||
| # | ||||
| # Exit without writing if there were no changes | ||||
| # | ||||
| if ($changes == 0) { | ||||
|     $log->info("No output written"); | ||||
|     exit $changes; | ||||
| } | ||||
|  | ||||
| # | ||||
| # Write output if an output file was specified | ||||
| # | ||||
| if ($outfile) { | ||||
|     write_output( $outfile, $html ); | ||||
|     $log->info("Changes applied; written to $outfile"); | ||||
| } | ||||
| else { | ||||
|     $log->info("No output written"); | ||||
| } | ||||
|  | ||||
| exit $changes; | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: find_links_in_file | ||||
| #      PURPOSE: Finds relative links in an HTML file | ||||
| #   PARAMETERS: $filename       the name of the file we're parsing | ||||
| #               $base_URL       the part of the full URL we'll replace | ||||
| #               $fragment       Boolean signalling whether to treat the HTML | ||||
| #                               as a fragment or an entire document | ||||
| #               $count_only     Boolean signalling that all we want is the | ||||
| #                               count of relative URLs, no action is to be taken | ||||
| #      RETURNS: The number of URLs "repaired". | ||||
| #  DESCRIPTION: | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub find_links_in_file { | ||||
|     my ( $filename, $base_URL, $fragment, $count_only ) = @_; | ||||
|  | ||||
|     my ( $base_uri, $tree, $uri_orig, $uri_new ); | ||||
|     my ( $newlink, $linkedits, $result ); | ||||
|  | ||||
|     # | ||||
|     # Parse the base URL | ||||
|     # | ||||
|     $base_uri = URI->new($base_URL); | ||||
|  | ||||
|     # | ||||
|     # Create a tree object | ||||
|     # | ||||
|     $tree = HTML::TreeBuilder->new; | ||||
|     $tree->ignore_unknown(0); | ||||
|     $tree->no_expand_entities(1); | ||||
|     $tree->p_strict(1); | ||||
|     $tree->store_comments(1); | ||||
|     $tree->warn(1); | ||||
|  | ||||
|     # | ||||
|     # Parse the file using IO::HTML to grab it. Die if we fail because then we | ||||
|     # know this stuff needs some urgent attention. | ||||
|     # | ||||
|     $tree->parse_file( html_file($filename) ) | ||||
|         or die "HTML::TreeBuilder failed to process $filename: $!\n"; | ||||
|  | ||||
|     $linkedits = 0; | ||||
|  | ||||
|     # | ||||
|     # Scan for all anchors and images using the HTML::Element method | ||||
|     # 'extract_links'. The technique used here is from the HTML::Element man | ||||
|     # page. | ||||
|     # | ||||
|     for ( @{ $tree->extract_links( 'a', 'img' ) } ) { | ||||
|         my ( $link, $element, $attr, $tag ) = @$_; | ||||
|  | ||||
|         # | ||||
|         # Parse the link | ||||
|         # | ||||
|         $uri_orig = URI->new($link); | ||||
|  | ||||
|         # | ||||
|         # A relative link (presumably) doesn't have a scheme | ||||
|         # | ||||
|         unless ( $uri_orig->scheme ) { | ||||
|             # | ||||
|             # Original link | ||||
|             # | ||||
|             say "Relative link: $link"; | ||||
|  | ||||
|             # | ||||
|             # Make the link absolute | ||||
|             # | ||||
|             $uri_new = make_absolute( $uri_orig, $base_uri ); | ||||
|             #            $uri_new = URI->new_abs( $link, $base_URL ); | ||||
|             $newlink = sprintf( "%s:%s", $uri_new->scheme, $uri_new->opaque ); | ||||
|             say "Absolute link: $newlink"; | ||||
|  | ||||
|             # | ||||
|             # Modify the HTML to make the relative absolute | ||||
|             # | ||||
|             if ( $uri_orig->fragment ) { | ||||
|                 # Not sure if we need to cater for URI fragments, but we'll try it | ||||
|                 $element->attr( $attr, $newlink . '#' . $uri_orig->fragment ); | ||||
|             } | ||||
|             else { | ||||
|                 $element->attr( $attr, $newlink ); | ||||
|             } | ||||
|  | ||||
|             $linkedits++; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # | ||||
|     # Exit here if we were just asked to count | ||||
|     # | ||||
|     return ( $linkedits, undef ) if $count_only; | ||||
|  | ||||
|     # | ||||
|     # In 'HTML fragment' mode generate the body part without the <body> tags. | ||||
|     # | ||||
|     if ($fragment) { | ||||
|         my $body = $tree->look_down( _tag => 'body' ); | ||||
|         ( $result = $body->as_HTML( undef, ' ', {} ) ) | ||||
|             =~ s{(^<body[^>]*>|</body>$)}{}gi; | ||||
|     } | ||||
|     else { | ||||
|         $result = $tree->as_HTML( undef, ' ', {} ); | ||||
|     } | ||||
|  | ||||
|     return ( $linkedits, $result ); | ||||
|  | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: write_output | ||||
| #      PURPOSE: Write the "repaired" HTML | ||||
| #   PARAMETERS: $outfile        name of the output file | ||||
| #               $html           the HTML to write out | ||||
| #      RETURNS: Nothing | ||||
| #  DESCRIPTION: | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub write_output { | ||||
|     my ( $outfile, $html ) = @_; | ||||
|  | ||||
|     open( my $out, '>:encoding(UTF-8)', $outfile ) | ||||
|         or die "Unable to open $outfile for output: $!\n"; | ||||
|  | ||||
|     print $out $html; | ||||
|  | ||||
|     close($out); | ||||
|  | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: make_absolute | ||||
| #      PURPOSE: Take a relative URI and a base URI and return the absolute URI | ||||
| #   PARAMETERS: $relative       relative URL as a URI object | ||||
| #               $base           base URL as a URI object | ||||
| #      RETURNS: Absolute URL as a URI object | ||||
| #  DESCRIPTION: | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub make_absolute { | ||||
|     my ( $relative, $base ) = @_; | ||||
|  | ||||
|     my ( %base_path, @relative_path, $absolute ); | ||||
|  | ||||
|     # | ||||
|     # Chop up the path from the base and store in a hash | ||||
|     # | ||||
|     %base_path = map { $_ => 1 } split( '/', $base->path ); | ||||
|  | ||||
|     # | ||||
|     # Chop up the relative path | ||||
|     # | ||||
|     @relative_path = split( '/', $relative->path ); | ||||
|  | ||||
|     # | ||||
|     # Remove relative path elements if they are in the base | ||||
|     # | ||||
|     @relative_path = grep { !exists( $base_path{$_} ) } @relative_path; | ||||
|  | ||||
|     # | ||||
|     # If the relative path is empty we assume it's referring to the | ||||
|     # 'index.html' file. | ||||
|     # | ||||
|     push( @relative_path, 'index.html' ) unless (@relative_path); | ||||
|  | ||||
|     # | ||||
|     # Rebuild the relative path | ||||
|     # | ||||
|     $relative->path( join( '/', @relative_path ) ); | ||||
|  | ||||
|     # | ||||
|     # Return the result of joining relative URL and base URL | ||||
|     # | ||||
|     $absolute = URI->new_abs( $relative->as_string, $base->as_string ); | ||||
|  | ||||
|     return $absolute; | ||||
|  | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: output_file_name | ||||
| #      PURPOSE: Generate an output file name with three choices | ||||
| #   PARAMETERS: $optarg         the argument to the option choosing the filename | ||||
| #               $showno         the show number to add to certain name types | ||||
| #               $template       a default 'sprintf' template for the name | ||||
| #      RETURNS: The name of the output file | ||||
| #  DESCRIPTION: If there's a defined output filename then there are three | ||||
| #               options: a null string, a plain filename and a substitution | ||||
| #               string with '%d' sequences. The null string means the user used | ||||
| #               '-option' without a value, so we want to generate a substitution | ||||
| #               string. A string with '%d' requires a check to ensure there's | ||||
| #               the right number, just one. The plain filename needs no more | ||||
| #               work. | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub output_file_name { | ||||
|     my ( $optarg, $showno, $template ) = @_; | ||||
|  | ||||
|     my ( $filename, $count ); | ||||
|  | ||||
|     # | ||||
|     # We shouldn't be called with a null option argument | ||||
|     # | ||||
|     return unless defined($optarg); | ||||
|  | ||||
|     # | ||||
|     # Does the option have an argument? | ||||
|     # | ||||
|     if ( $optarg =~ /^$/ ) { | ||||
|         # | ||||
|         # No argument; use the show number from the -episode=N option | ||||
|         # | ||||
|         $filename = sprintf( $template, $showno ); | ||||
|     } | ||||
|     elsif ( $optarg =~ /%d/ ) { | ||||
|         # | ||||
|         # There's an argument, does it have a '%d' in it? | ||||
|         # | ||||
|         $count = () = $optarg =~ /%d/g; | ||||
|         die "Invalid - too many '%d' sequences in '$optarg'\n" | ||||
|             if ( $count > 1 ); | ||||
|         $filename = sprintf( $optarg, $showno ); | ||||
|     } | ||||
|     else { | ||||
|         # | ||||
|         # It's a plain filename, just return it | ||||
|         # | ||||
|         $filename = $optarg; | ||||
|     } | ||||
|  | ||||
|     return $filename; | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: coalesce | ||||
| #      PURPOSE: To find the first defined argument and return it | ||||
| #   PARAMETERS: Arbitrary number of arguments | ||||
| #      RETURNS: The first defined argument or undef if there are none | ||||
| #  DESCRIPTION: Just a simple way of ensuring an 'undef' value is never | ||||
| #               returned when doing so might be a problem. | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub coalesce { | ||||
|     foreach (@_) { | ||||
|         return $_ if defined($_); | ||||
|     } | ||||
|     return undef;    ## no critic | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: _debug | ||||
| #      PURPOSE: Prints debug reports | ||||
| #   PARAMETERS: $active         Boolean: 1 for print, 0 for no print | ||||
| #               $message        Message to print | ||||
| #      RETURNS: Nothing | ||||
| #  DESCRIPTION: Outputs a message if $active is true. It removes any trailing | ||||
| #               newline and then adds one in the 'print' to the caller doesn't | ||||
| #               have to bother. Prepends the message with 'D> ' to show it's | ||||
| #               a debug message. | ||||
| #       THROWS: No exceptions | ||||
| #     COMMENTS: None | ||||
| #     SEE ALSO: N/A | ||||
| #=============================================================================== | ||||
| sub _debug { | ||||
|     my ( $active, $message ) = @_; | ||||
|  | ||||
|     chomp($message); | ||||
|     print "D> $message\n" if $active; | ||||
| } | ||||
|  | ||||
| #===  FUNCTION  ================================================================ | ||||
| #         NAME: Options | ||||
| #      PURPOSE: Processes command-line options | ||||
| #   PARAMETERS: $optref     Hash reference to hold the options | ||||
| #      RETURNS: Undef | ||||
| #  DESCRIPTION: Process the options we want to offer. See the documentation | ||||
| #               for details | ||||
| #       THROWS: no exceptions | ||||
| #     COMMENTS: none | ||||
| #     SEE ALSO: n/a | ||||
| #=============================================================================== | ||||
| sub Options { | ||||
|     my ($optref) = @_; | ||||
|  | ||||
|     my @options = ( | ||||
|         "help",      "documentation|man", "debug=i", "episode=i", | ||||
|         "baseURL=s", "fragment!",         "count!",  "output:s", | ||||
|     ); | ||||
|  | ||||
|     if ( !GetOptions( $optref, @options ) ) { | ||||
|         pod2usage( | ||||
|             -msg     => "$PROG version $VERSION\n", | ||||
|             -exitval => 1, | ||||
|             -verbose => 0 | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     return; | ||||
| } | ||||
|  | ||||
| __END__ | ||||
|  | ||||
| #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||||
| #  Application Documentation | ||||
| #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||||
| #{{{ | ||||
|  | ||||
| =head1 NAME | ||||
|  | ||||
| fix_relative_links - Repair relative URLs in HTML shownotes | ||||
|  | ||||
| =head1 VERSION | ||||
|  | ||||
| This documentation refers to fix_relative_links version 0.0.3 | ||||
|  | ||||
| =head1 USAGE | ||||
|  | ||||
|     ./fix_relative_links -ep=3705 shownotes/hpr3705/hpr3705.html -fragment | ||||
|  | ||||
| =head1 REQUIRED ARGUMENTS | ||||
|  | ||||
| =over 4 | ||||
|  | ||||
| =item B<filename> | ||||
|  | ||||
| The name of the file containing the HTML to be repaired. If no path is given | ||||
| this will be supplied by the script as: | ||||
|  | ||||
|     ~/HPR/Show_Submission/shownotes/hpr${show}/ | ||||
|  | ||||
| It is probably wiser to be explicit about the path to the HTML file to be | ||||
| parsed. | ||||
|  | ||||
| =back | ||||
|  | ||||
| =head1 OPTIONS | ||||
|  | ||||
| =over 8 | ||||
|  | ||||
| =item B<-help> | ||||
|  | ||||
| Prints a brief help message describing the usage of the program, and then exits. | ||||
|  | ||||
| =item B<-documentation> or B<-man> | ||||
|  | ||||
| Prints the entire documentation for the script in the form of a manual page. | ||||
|  | ||||
| =item B<-debug=N> | ||||
|  | ||||
| Causes certain debugging information to be displayed. | ||||
|  | ||||
|     0   (the default) no debug output | ||||
|     1    | ||||
|     2    | ||||
|     3    | ||||
|  | ||||
| =item B<-episode=N> | ||||
|  | ||||
| This option is mandatory and specifies the show number being processed. The | ||||
| number is used to generate default file names and paths as well as the default | ||||
| base URL described below. | ||||
|  | ||||
| =item B<-baseURL=URL> | ||||
|  | ||||
| This option will default to the foillowing URL if not provided: | ||||
|  | ||||
|     https://hackerpublicradio.org/eps/hpr${show}/ | ||||
|  | ||||
| It can be used to define a non-standard URL, such as one at a lower level than | ||||
| the example above which might contain thumbnail pictures for example. | ||||
|  | ||||
| =item B<-[no]fragment> | ||||
|  | ||||
| This Boolean option defines the HTML being parsed and checked as a fragment or | ||||
| a complete stand-alone document. By default B<-nofragment> is assumed. It is | ||||
| necessary to use B<fragment> for the case where the HTML shownotes are being | ||||
| parsed. | ||||
|  | ||||
| =item B<-[no]count> | ||||
|  | ||||
| This Boolean option defines whether to simply count the necessary changes or | ||||
| to apply them to the given HTML file. By default B<-nocount> is assumed, and | ||||
| changes will be applied. | ||||
|  | ||||
| =item B<-output[=FILE]> | ||||
|  | ||||
| This option can be omitted or can be given without the B<FILE> name. If | ||||
| omitted entirely no output will be written even though the HTML file has been | ||||
| read and processed. If specified without the output file name the default name | ||||
| will be B<hpr${show}_new.html>. If no path is specified with the file name | ||||
| then a default will be generated as: | ||||
|  | ||||
|     ~/HPR/Show_Submission/shownotes/hpr${show}/hpr${show}_new.html | ||||
|  | ||||
| The output file name can be given in the form of a B<printf> template such as: | ||||
|  | ||||
|     hpr%d_new.html | ||||
|  | ||||
| and the B<%d> will be replaced by the show number given through the | ||||
| B<-episode=N> option described above. | ||||
|  | ||||
| =back | ||||
|  | ||||
| =head1 DESCRIPTION | ||||
|  | ||||
| The script reads a file of HTML which has either been submitted by an HPR host | ||||
| as it is or has been generated from one of the markup languages accepted in | ||||
| the upload form. Most often this file will contain the main notes for a show | ||||
| and will eventually be saved in the HPR database. | ||||
|  | ||||
| It is also possible to use the script to process other HTML files submitted | ||||
| with an HPR show. | ||||
|  | ||||
| The purpose of the script is to find relative URLs in the HTML and convert | ||||
| them to absolute ones. The HPR website requests that absolute URLs be used | ||||
| since then they can be used in the various RSS feeds which are available, but | ||||
| many hosts forget to follow this request. | ||||
|  | ||||
| The HTML is parsed using B<HTML::TreeBuilder> and is searched for B<a> or | ||||
| B<img> tags. These are checked to ensure they contain absolute links, and if | ||||
| not are converted appropriately using a base URL for the HPR website. | ||||
|  | ||||
| A count of changes is returned by the script and the converted HTML is written | ||||
| out to a file if required. The script can be used to see if any conversions | ||||
| are necessary before making the changes. | ||||
|  | ||||
| The script is also capable of treating full HTML documents differently from | ||||
| the HTML fragments that are stored in the HPR database. An option is required | ||||
| to specify which type of HTML is being processed. | ||||
|  | ||||
| =head1 DIAGNOSTICS | ||||
|  | ||||
| Error and warning messages generated by the script. | ||||
|  | ||||
| =over 4 | ||||
|  | ||||
| =item B<Unable to find ...> | ||||
|  | ||||
| Type: fatal | ||||
|  | ||||
| The script was unable to find the specified input file. | ||||
|  | ||||
| =item B<HTML::TreeBuilder failed to process ...: ...> | ||||
|  | ||||
| Type: fatal | ||||
|  | ||||
| The script attempted to use B<HTML::TreeBuilder> to parse the input file | ||||
| but failed. The message also contains details of the failure. | ||||
|  | ||||
| =item B<Unable to open ... for output: ...> | ||||
|  | ||||
| Type: fatal | ||||
|  | ||||
| The script attempted to open the requested output file but failed. The reason | ||||
| for the failure is included in the error message. | ||||
|  | ||||
| =item B<Invalid - too many '%d' sequences in '...'> | ||||
|  | ||||
| Type: fatal | ||||
|  | ||||
| The script attempted to generate a name for the requested output file using | ||||
| the supplied template, but failed because there were too many B<%d> elements | ||||
| in the template. Only one should be provided, which will be substituted with | ||||
| the show number. | ||||
|  | ||||
| =back | ||||
|  | ||||
| =head1 DEPENDENCIES | ||||
|  | ||||
|     Carp | ||||
|     Data::Dumper | ||||
|     File::Basename | ||||
|     Getopt::Long | ||||
|     HTML::TreeBuilder | ||||
|     IO::HTML | ||||
|     Log::Handler | ||||
|     Pod::Usage | ||||
|     URI | ||||
|  | ||||
| =head1 BUGS AND LIMITATIONS | ||||
|  | ||||
| There are no known bugs in this module. | ||||
| Please report problems to Dave Morriss (Dave.Morriss@gmail.com) | ||||
| Patches are welcome. | ||||
|  | ||||
| =head1 AUTHOR | ||||
|  | ||||
| Dave Morriss (Dave.Morriss@gmail.com) | ||||
|  | ||||
| =head1 LICENCE AND COPYRIGHT | ||||
|  | ||||
| Copyright (c) <year> Dave Morriss (Dave.Morriss@gmail.com). All rights reserved. | ||||
|  | ||||
| This module is free software; you can redistribute it and/or | ||||
| modify it under the same terms as Perl itself. See perldoc perlartistic. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||||
|  | ||||
| =cut | ||||
|  | ||||
| #}}} | ||||
|  | ||||
| # [zo to open fold, zc to close] | ||||
|  | ||||
| # vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker | ||||
|  | ||||
		Reference in New Issue
	
	Block a user