forked from HPR/hpr-tools
		
	
		
			
	
	
		
			398 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			398 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| 
								 | 
							
								#!/usr/bin/env perl
							 | 
						||
| 
								 | 
							
								#===============================================================================
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#         FILE: find_series
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#        USAGE: ./find_series
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#  DESCRIPTION: Gathers information from the HPR database to assist with the
							 | 
						||
| 
								 | 
							
								#               process of placing episodes into series groups
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#      OPTIONS: ---
							 | 
						||
| 
								 | 
							
								# REQUIREMENTS: ---
							 | 
						||
| 
								 | 
							
								#         BUGS: ---
							 | 
						||
| 
								 | 
							
								#        NOTES: ---
							 | 
						||
| 
								 | 
							
								#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
							 | 
						||
| 
								 | 
							
								#      VERSION: 0.0.5
							 | 
						||
| 
								 | 
							
								#      CREATED: 2014-04-25 17:11:21
							 | 
						||
| 
								 | 
							
								#     REVISION: 2016-06-15 15:51:55
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#===============================================================================
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								use v5.16;
							 | 
						||
| 
								 | 
							
								use strict;
							 | 
						||
| 
								 | 
							
								use warnings;
							 | 
						||
| 
								 | 
							
								use utf8;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								use Config::General;
							 | 
						||
| 
								 | 
							
								use List::MoreUtils qw(uniq);
							 | 
						||
| 
								 | 
							
								use YAML::XS qw{LoadFile};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								use DBI;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								use Data::Dumper;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Version number (manually incremented)
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								our $VERSION = '0.0.5';
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Various constants
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								( my $PROG = $0 ) =~ s|.*/||mx;
							 | 
						||
| 
								 | 
							
								( my $DIR  = $0 ) =~ s|/?[^/]*$||mx;
							 | 
						||
| 
								 | 
							
								$DIR = '.' unless $DIR;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# Declarations
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Constants and other declarations
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								my $basedir    = "$ENV{HOME}/HPR/Database";
							 | 
						||
| 
								 | 
							
								my $configfile = "$basedir/.hpr_db.cfg";
							 | 
						||
| 
								 | 
							
								my $ignorefile = "$basedir/.$PROG.yml";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								my $file_template = "${PROG}_%d.out";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								my ( $dbh,    $sth1,    $h1 );
							 | 
						||
| 
								 | 
							
								my ( @ignore, $title,   @words, $pair );
							 | 
						||
| 
								 | 
							
								my ( %eps,    %tags,    @taglist, %single_words, %double_words );
							 | 
						||
| 
								 | 
							
								my ( $phase,  $outfile, $outfh );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Enable Unicode mode
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								binmode STDOUT, ":encoding(UTF-8)";
							 | 
						||
| 
								 | 
							
								binmode STDERR, ":encoding(UTF-8)";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Load YAML ignore list
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								my $yaml = LoadFile($ignorefile);
							 | 
						||
| 
								 | 
							
								@ignore = uniq( @{ $yaml->{ignore} } );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Load database configuration data
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								my $conf = Config::General->new(
							 | 
						||
| 
								 | 
							
								    -ConfigFile      => $configfile,
							 | 
						||
| 
								 | 
							
								    -InterPolateVars => 1,
							 | 
						||
| 
								 | 
							
								    -ExtendedAccess  => 1
							 | 
						||
| 
								 | 
							
								);
							 | 
						||
| 
								 | 
							
								my %config = $conf->getall();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# Connect to the database
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								my $dbhost = $config{database}->{host} // '127.0.0.1';
							 | 
						||
| 
								 | 
							
								my $dbport = $config{database}->{port} // 3306;
							 | 
						||
| 
								 | 
							
								my $dbname = $config{database}->{name};
							 | 
						||
| 
								 | 
							
								my $dbuser = $config{database}->{user};
							 | 
						||
| 
								 | 
							
								my $dbpwd  = $config{database}->{password};
							 | 
						||
| 
								 | 
							
								$dbh = DBI->connect( "dbi:mysql:host=$dbhost;port=$dbport;database=$dbname",
							 | 
						||
| 
								 | 
							
								    $dbuser, $dbpwd, { AutoCommit => 1 } )
							 | 
						||
| 
								 | 
							
								    or die $DBI::errstr;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Enable client-side UTF8
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								$dbh->{mysql_enable_utf8} = 1;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Prepare to collect episode titles with the series it's related to
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								$sth1 = $dbh->prepare(
							 | 
						||
| 
								 | 
							
								    q{SELECT
							 | 
						||
| 
								 | 
							
								            eps.id AS eps_id,
							 | 
						||
| 
								 | 
							
								            eps.date,
							 | 
						||
| 
								 | 
							
								            eps.title,
							 | 
						||
| 
								 | 
							
								            eps.duration,
							 | 
						||
| 
								 | 
							
								            eps.summary,
							 | 
						||
| 
								 | 
							
								            eps.notes,
							 | 
						||
| 
								 | 
							
								            eps.hostid,
							 | 
						||
| 
								 | 
							
								            eps.series,
							 | 
						||
| 
								 | 
							
								            eps.explicit,
							 | 
						||
| 
								 | 
							
								            eps.license,
							 | 
						||
| 
								 | 
							
								            eps.tags,
							 | 
						||
| 
								 | 
							
								            eps.version,
							 | 
						||
| 
								 | 
							
								            eps.downloads,
							 | 
						||
| 
								 | 
							
								            eps.valid AS eps_valid,
							 | 
						||
| 
								 | 
							
								            ms.id AS ms_id,
							 | 
						||
| 
								 | 
							
								            ms.name,
							 | 
						||
| 
								 | 
							
								            ms.description,
							 | 
						||
| 
								 | 
							
								            ms.private,
							 | 
						||
| 
								 | 
							
								            ms.image,
							 | 
						||
| 
								 | 
							
								            ms.valid AS ms_valid
							 | 
						||
| 
								 | 
							
								        FROM eps
							 | 
						||
| 
								 | 
							
								        JOIN miniseries ms ON eps.series = ms.id
							 | 
						||
| 
								 | 
							
								        WHERE eps.valid = 1}
							 | 
						||
| 
								 | 
							
								);
							 | 
						||
| 
								 | 
							
								$sth1->execute;
							 | 
						||
| 
								 | 
							
								if ( $dbh->err ) {
							 | 
						||
| 
								 | 
							
								    warn $dbh->errstr;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Generate an output file for phase 1
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								$phase = 1;
							 | 
						||
| 
								 | 
							
								newOutput( \$outfh, $file_template, $phase++ );
							 | 
						||
| 
								 | 
							
								print $outfh "Show  (Series) Title\n";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# Walk through the episode/series list from the database and build structures
							 | 
						||
| 
								 | 
							
								# from them.
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# The structures built are:
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# %eps - a hash indexed by the episode number with an arrayref as the
							 | 
						||
| 
								 | 
							
								# value. The arrayref starts with the episode title and is followed by the
							 | 
						||
| 
								 | 
							
								# important words from the title in lower case. The term "important" means
							 | 
						||
| 
								 | 
							
								# that the word must consist of the allowed characters and not be in the
							 | 
						||
| 
								 | 
							
								# 'ignore' list.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# %tags - a hash indexed by the episode number with a CSV string of tags
							 | 
						||
| 
								 | 
							
								# associated with the episode as the value.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# %single_words - a hash indexed by one of the words collected from the title
							 | 
						||
| 
								 | 
							
								# (see %eps above). The value of each element is complex. It consists of an
							 | 
						||
| 
								 | 
							
								# arrayref, the first element of which is a count of the succeeding elements.
							 | 
						||
| 
								 | 
							
								# The next N elements are arrayrefs each of which contains two elements: an
							 | 
						||
| 
								 | 
							
								# episode number and an episode title. Here is an example in Data::Dumper
							 | 
						||
| 
								 | 
							
								# format:
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# 'pre-ibm' => [
							 | 
						||
| 
								 | 
							
								#                2,
							 | 
						||
| 
								 | 
							
								#                [
							 | 
						||
| 
								 | 
							
								#                  687,
							 | 
						||
| 
								 | 
							
								#                  'pre-IBM PC computer history 1'
							 | 
						||
| 
								 | 
							
								#                ],
							 | 
						||
| 
								 | 
							
								#                [
							 | 
						||
| 
								 | 
							
								#                  691,
							 | 
						||
| 
								 | 
							
								#                  'pre-IBM PC computer history 2'
							 | 
						||
| 
								 | 
							
								#                ]
							 | 
						||
| 
								 | 
							
								#              ],
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								while ( $h1 = $sth1->fetchrow_hashref ) {
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Report what we found
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    printf $outfh "%4d: (%-2d)     %s\n", $h1->{eps_id}, $h1->{ms_id},
							 | 
						||
| 
								 | 
							
								        $h1->{title};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Skip this episode if it already has a series
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    next if $h1->{series} > 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Save this episode for later
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    $eps{ $h1->{eps_id} } = [ $h1->{title} ];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Save tag details
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    $tags{ $h1->{eps_id} } = $h1->{tags};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Strip any trailing full stop, and chop the title into words
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    ( $title = $h1->{title} ) =~ s/\.$//;
							 | 
						||
| 
								 | 
							
								    @words = split( /\s+/, $title );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Remove all unwanted characters and force to lowercase (use 'fc' for case
							 | 
						||
| 
								 | 
							
								    # folding since it's aware of character sets)
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    foreach my $word (@words) {
							 | 
						||
| 
								 | 
							
								        $word =~ s{[^a-zA-Z./_-]}{}g;
							 | 
						||
| 
								 | 
							
								        $word = fc($word);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Clean up the word list after removing unwanted characters
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    @words = grep { $_ !~ /^-*$/ } @words;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Extract pairs of words before they're made unique and make a hash
							 | 
						||
| 
								 | 
							
								    # pointing to the episodes they originated from
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    for (my $i = 0; $i < $#words; $i++) {
							 | 
						||
| 
								 | 
							
								        $pair = "$words[$i] $words[$i+1]";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if ( exists( $double_words{$pair} ) ) {
							 | 
						||
| 
								 | 
							
								            $double_words{$pair}->[0] += 1;
							 | 
						||
| 
								 | 
							
								            push( @{ $double_words{$pair} },
							 | 
						||
| 
								 | 
							
								                [ $h1->{eps_id}, $h1->{title} ] );
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        else {
							 | 
						||
| 
								 | 
							
								            $double_words{$pair} = [ 1, [ $h1->{eps_id}, $h1->{title} ] ];
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Make the word list unique
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    @words = uniq(@words);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    # Walk the tidied single word list
							 | 
						||
| 
								 | 
							
								    #
							 | 
						||
| 
								 | 
							
								    foreach my $word (@words) {
							 | 
						||
| 
								 | 
							
								        #
							 | 
						||
| 
								 | 
							
								        # Ignore very short words and words in the ignore list
							 | 
						||
| 
								 | 
							
								        #
							 | 
						||
| 
								 | 
							
								        next if length($word) < 2;
							 | 
						||
| 
								 | 
							
								        next if grep( /^$word$/, @ignore );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        #
							 | 
						||
| 
								 | 
							
								        # Save this word in the episodes hash
							 | 
						||
| 
								 | 
							
								        #
							 | 
						||
| 
								 | 
							
								        push( @{ $eps{ $h1->{eps_id} } }, $word );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        #
							 | 
						||
| 
								 | 
							
								        # If the word is not known initialise the entry containing an arrayref
							 | 
						||
| 
								 | 
							
								        # with a counter and another arrayref with the saved episode number
							 | 
						||
| 
								 | 
							
								        # and title. If it's known, increment the counter and stash the
							 | 
						||
| 
								 | 
							
								        # episode details as another arrayref.
							 | 
						||
| 
								 | 
							
								        #
							 | 
						||
| 
								 | 
							
								        if ( exists( $single_words{$word} ) ) {
							 | 
						||
| 
								 | 
							
								            $single_words{$word}->[0] += 1;
							 | 
						||
| 
								 | 
							
								            push( @{ $single_words{$word} },
							 | 
						||
| 
								 | 
							
								                [ $h1->{eps_id}, $h1->{title} ] );
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        else {
							 | 
						||
| 
								 | 
							
								            $single_words{$word} = [ 1, [ $h1->{eps_id}, $h1->{title} ] ];
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# We've finished with the database
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								$dbh->disconnect;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# Done the first pass, prepare for the next
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								newOutput( \$outfh, $file_template, $phase++ );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Process the saved data in increasing order of the frequency. Print the word
							 | 
						||
| 
								 | 
							
								# and its frequency and follow that by the stashed episode details in the
							 | 
						||
| 
								 | 
							
								# order we saw them
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								foreach my $key (
							 | 
						||
| 
								 | 
							
								    sort { $single_words{$a}->[0] <=> $single_words{$b}->[0] }
							 | 
						||
| 
								 | 
							
								    sort( keys(%single_words) )
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    if ( $single_words{$key}->[0] > 3 ) {
							 | 
						||
| 
								 | 
							
								        printf $outfh "%15s: %s\n", $key, $single_words{$key}->[0];
							 | 
						||
| 
								 | 
							
								        for ( my $i = 1; $i <= $single_words{$key}->[0]; $i++ ) {
							 | 
						||
| 
								 | 
							
								            printf $outfh "%17s%4d: %s", ' ', @{ $single_words{$key}->[$i] };
							 | 
						||
| 
								 | 
							
								            @taglist
							 | 
						||
| 
								 | 
							
								                = split( /\s*,\s*/, $tags{ $single_words{$key}->[$i]->[0] } );
							 | 
						||
| 
								 | 
							
								            if (@taglist) {
							 | 
						||
| 
								 | 
							
								                print $outfh " [", join( ",", @taglist ), "]\n";
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            else {
							 | 
						||
| 
								 | 
							
								                print $outfh "\n";
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        print $outfh "\n";
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# Done the second pass, prepare for the next
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								newOutput( \$outfh, $file_template, $phase++ );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# Look through the collected data from the point of view of the episode, list
							 | 
						||
| 
								 | 
							
								# all the (relevant) words in the title in order and report their frequencies
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								for my $key ( sort { $a <=> $b } keys(%eps) ) {
							 | 
						||
| 
								 | 
							
								    printf $outfh "%4d: %s\n", $key, $eps{$key}->[0];
							 | 
						||
| 
								 | 
							
								    for ( my $i = 1; $i < scalar( @{ $eps{$key} } ); $i++ ) {
							 | 
						||
| 
								 | 
							
								        my $word = $eps{$key}->[$i];
							 | 
						||
| 
								 | 
							
								        printf $outfh "      %15s %d\n", $word, $single_words{$word}->[0];
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    print $outfh "\n";
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								# Done the third pass, prepare for the next
							 | 
						||
| 
								 | 
							
								#-------------------------------------------------------------------------------
							 | 
						||
| 
								 | 
							
								newOutput( \$outfh, $file_template, $phase++ );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								# So the pairs of words we collected earlier might show something interesting.
							 | 
						||
| 
								 | 
							
								# Let's see.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								foreach my $key (
							 | 
						||
| 
								 | 
							
								    sort { $double_words{$a}->[0] <=> $double_words{$b}->[0] }
							 | 
						||
| 
								 | 
							
								    sort( keys(%double_words) )
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    if ( $double_words{$key}->[0] > 3 ) {
							 | 
						||
| 
								 | 
							
								        printf $outfh "%15s: %s\n", $key, $double_words{$key}->[0];
							 | 
						||
| 
								 | 
							
								        for ( my $i = 1; $i <= $double_words{$key}->[0]; $i++ ) {
							 | 
						||
| 
								 | 
							
								            printf $outfh "%17s%4d: %s", ' ', @{ $double_words{$key}->[$i] };
							 | 
						||
| 
								 | 
							
								            @taglist
							 | 
						||
| 
								 | 
							
								                = split( /\s*,\s*/, $tags{ $double_words{$key}->[$i]->[0] } );
							 | 
						||
| 
								 | 
							
								            if (@taglist) {
							 | 
						||
| 
								 | 
							
								                print $outfh " [", join( ",", @taglist ), "]\n";
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            else {
							 | 
						||
| 
								 | 
							
								                print $outfh "\n";
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        print $outfh "\n";
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								close($outfh);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								exit;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#===  FUNCTION  ================================================================
							 | 
						||
| 
								 | 
							
								#         NAME: newOutput
							 | 
						||
| 
								 | 
							
								#      PURPOSE: Generate a new output file
							 | 
						||
| 
								 | 
							
								#   PARAMETERS: $fh             a scalar ref pointing to a variable to hold a
							 | 
						||
| 
								 | 
							
								#                               file handle
							 | 
						||
| 
								 | 
							
								#               $template       a string suitable for sprintf for defining the
							 | 
						||
| 
								 | 
							
								#                               name of the output file
							 | 
						||
| 
								 | 
							
								#               $phase          an integer to be incorporated into the output
							 | 
						||
| 
								 | 
							
								#                               file name
							 | 
						||
| 
								 | 
							
								#      RETURNS: Nothing
							 | 
						||
| 
								 | 
							
								#  DESCRIPTION: Closes any existing file and opens a new one with the same
							 | 
						||
| 
								 | 
							
								#               file handle. The name of the file is derived from the template
							 | 
						||
| 
								 | 
							
								#               and the phase number.
							 | 
						||
| 
								 | 
							
								#       THROWS: No exceptions
							 | 
						||
| 
								 | 
							
								#     COMMENTS: None
							 | 
						||
| 
								 | 
							
								#     SEE ALSO: N/A
							 | 
						||
| 
								 | 
							
								#===============================================================================
							 | 
						||
| 
								 | 
							
								sub newOutput {
							 | 
						||
| 
								 | 
							
								    my ( $fh, $template, $phase ) = @_;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if ($$fh) {
							 | 
						||
| 
								 | 
							
								        close($$fh) if ( tell($$fh) > -1 );
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    my $outfile = sprintf( $template, $phase );
							 | 
						||
| 
								 | 
							
								    open( $$fh, '>:encoding(UTF-8)', $outfile )
							 | 
						||
| 
								 | 
							
								        or die "Unable to open $outfile\n";
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
							 |