#!/usr/bin/perl 
#===============================================================================
#
#         FILE: transfer_tags
#
#        USAGE: ./transfer_tags masterfile
#
#  DESCRIPTION: Transfer ID3 (or equivalent) tags from a base file to
#  		whichever of FLAC, MP3, OGG, SPX and WAV versions are found.
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
#      VERSION: 1.4.2
#      CREATED: 31/03/2013 14:18:55
#     REVISION: 24/05/2013 15:54:36
#
#===============================================================================

use 5.010;
use strict;
use warnings;

use File::Basename;
use File::Find::Rule;
use Audio::TagLib;

#
# Version number (manually incremented)
#
our $VERSION = '1.4.2';

#
# Declarations
#
my ( $ref, $tag, $tags, %mastertags, %slavetags, $changed );
my ( $directories, $filename, $suffix );

#
# The extensions the various codings are expected to have (it looks both for
# the lower- and upper-case versions, but only the lower-case ones are
# required here)
#
my @exts = qw{ flac mp3 ogg spx wav };

#
# The additional name variants we'll accept
#
my @variants = qw{ _mez };

#
# Used to test tag conformity. The key is the tagname, the value is a hashref
# containing a regex (key 're') for detecting conformity and the correct value
# (key 'ok').
# To be passed to subroutine 'checkConformity', but kept here for ease of
# maintenance.
#
my %tag_control = (
    album => { re => qr{^Hacker Public Radio$}, ok => 'Hacker Public Radio' },
    comment => {
        re => qr{^http://hackerpublicradio\.org/?},
        ok => 'http://hackerpublicradio.org'
    },
    genre => { re => qr{(?i)^Podcast$}, ok => 'Podcast' },
);

#
# The Audio::TagLib methods to call for each tag manipulated by the script.
# The number after the method name is 1 if the value being set is a string,
# and zero otherwise.
#
my %tagmethods = (
    album   => [ 'setAlbum',   1 ],
    artist  => [ 'setArtist',  1 ],
    comment => [ 'setComment', 1 ],
    genre   => [ 'setGenre',   1 ],
    title   => [ 'setTitle',   1 ],
    track   => [ 'setTrack',   0 ],
    year    => [ 'setYear',    0 ],
);

#
# Because Audio::TagLib::FileRef does not seem to commit the tag update until
# very late (during the DESTROY?) it's very difficult to update file times
# _after_ the tags have been written. The solution is to save all of the tag
# hashes (which contain the file path and times) and process then in the END{}
# block. Dirty, but very Perl-ish.
#
my @tag_stash;

#
# Script name
#
( my $PROG = $0 ) =~ s|.*/||mx;

#
# Ensure STDOUT is in UTF8 mode
#
binmode( STDOUT, ":encoding(utf8)" );

#
# Get the argument, the "master" file
#
my $masterfile = shift;
die "Usage: $PROG masterfilename\n" unless $masterfile;

#
# Check the file exists
#
die "$masterfile does not exist\n" unless ( -e $masterfile );

#
# Assume there are other versions of the same file with suffixes in the set
# { flac, mp3, ogg, spx, wav } so build these names from the master file name.
# Start by parsing the filename into its directories, filename and suffix.
# Remove the leading dot from the suffix.
#
($filename,$directories,$suffix) = fileparse($masterfile,qr/\.[^.]*/);
$suffix =~ s/^\.//;

#
# Reject the file if it doesn't have an expected suffix (we're not
# case-sensitive here)
#
die "$masterfile does not have a recognised suffix (expecting "
    . join( ", ", @exts ) . ")\n"
    unless ( grep {/$suffix/i} @exts );

#
# Use File::Find::Rule to find all the files that match a regular expression.
# This is built from the parsed file, with an optional list of variants and
# all of the extensions (case insensitively). We then remove the master file
# from the list and we're done.
#
my $re
    = "^${filename}("
    . join( '|', @variants ) . ")?\.(?i:"
    . join( '|', @exts ) . ')$';
my @files = grep { !/^$masterfile$/ }
    File::Find::Rule->file()->name(qr{$re})->in($directories);

#
# Log the file
#
print "$masterfile\n";

#
# Collect the tags from the master file (& stash them for later too)
#
( $ref, $tag, $tags ) = collectTags($masterfile);
%mastertags = %$tags;
push( @tag_stash, $tags );

#
# Report the tags found in the master file
#
reportTags( \%mastertags );

#
# Check that the master file conforms to the HPR standards, ensuring that any
# changes are returned from the routine
#
( $ref, $tag, $tags ) = checkConformity( $ref, $tag, $tags, \%tag_control );
%mastertags = %$tags;
print '=' x 80, "\n";

#
# Now process the "slave" files
#
foreach my $file (@files) {
    #
    # Check the "slave" file exists and report it if so
    #
    if ( -r $file ) {
        print "$file\n";
    }
    else {
        warn "$file is not readable\n";
        next;
    }

    #
    # Get the "slave" file's tags (& keep a copy in the stash)
    #
    ( $ref, $tag, $tags ) = collectTags($file);
    %slavetags = %$tags;
    push( @tag_stash, $tags );

    #
    # Report the tags
    #
    reportTags( \%slavetags );

    #
    # Change the tags to match the "master" file's tags
    #
    $changed = 0;
    for my $t ( sort( grep { !/^_/ } keys(%mastertags) ) ) {
        $changed += changeTag( $tag, $t, $slavetags{$t}, $mastertags{$t},
            @{ $tagmethods{$t} } );
    }
    print '-' x 80, "\n";

    #
    # Save any changes
    #
    if ($changed) {
        $ref->save();
    }
}

exit;

#-------------------------------------------------------------------------------
#  Post-processing of file times
#
#  Process all files we've visited (whether their tags were changed or not)
#  and force the 'atime' and 'mtime' back to their starting values. This will
#  be done after the rest of the script runs, when we know that all of the
#  Audio::TagLib::FileRef objects have been destroyed and have done their lazy
#  updates.
#-------------------------------------------------------------------------------
END {
    for my $t (@tag_stash) {
        warn "Time restoration failed on $t->{_path}\n"
            unless restoreTimes($t);
    }
}

#===  FUNCTION  ================================================================
#         NAME: collectTags
#      PURPOSE: Collects tags from a media file
#   PARAMETERS: $filepath               Path to the file
#      RETURNS: A list containing an Audio::TagLib::FileRef object, an
#               Audio::TagLib::Tag object (containing the actual tags) and
#               a hashref containing the converted tag values (along with
#               a few other attributes).
#  DESCRIPTION: Collects the tags and the timestamps from the file. Then the
#               various tags and other attributes are placed in a hash which
#               will be returned to the caller. The non-tag keys begin with
#               '_' to differentiate them.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO:
#===============================================================================
sub collectTags {
    my ($filepath) = @_;

    my ( $atime, $mtime ) = ( stat($filepath) )[ 8, 9 ];

    my $fileref = Audio::TagLib::FileRef->new($filepath);
    my $ftag    = $fileref->tag();

    my %tags = (
        album   => $ftag->album()->toCString(),
        artist  => $ftag->artist()->toCString(),
        comment => $ftag->comment()->toCString(),
        genre   => $ftag->genre()->toCString(),
        title   => $ftag->title()->toCString(),
        track   => $ftag->track(),
        year    => $ftag->year(),
        _path   => $filepath,
        _atime  => $atime,
        _mtime  => $mtime,
    );

    return ( $fileref, $ftag, \%tags );
}

#===  FUNCTION  ================================================================
#         NAME: reportTags
#      PURPOSE: Print the tags in a hash
#   PARAMETERS: $tags           Hashref keyed by tagname and containing tag
#                               contents from a media file
#      RETURNS: Nothing
#  DESCRIPTION: Just prints all the "proper" tags held in the hash argument in
#               alphabetical order of the keys. Note that the "secret" keys,
#               those begining with '_', are skipped. See 'collectTags' for
#               what they are.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO:
#===============================================================================
sub reportTags {
    my ($tags) = @_;

    my @keys = sort( grep { !/^_/ } keys(%$tags) );

    for my $key (@keys) {
        printf "%-10s: %s\n", $key, $tags->{$key};
    }

    return;
}

#===  FUNCTION  ================================================================
#         NAME: changeTag
#      PURPOSE: Changes a tag to a new value if appropriate
#   PARAMETERS: $tag            Audio::TagLib::Tag object
#               $tagname        Name of tag
#               $oldValue       Current value of tag
#               $newValue       New value of tag or undefined
#               $setFunc        String containing the name of the 'set'
#                               function
#               $isString       True if the value being set is a string
#      RETURNS: 1 if a change has been made, 0 otherwise
#  DESCRIPTION: Performs some argument checks, returning on a missing new
#               value, or if the old and new values are the same. The old and
#               new values may be encoded integers, so we look for this
#               eventuality. After all of this we know there's a change to be
#               made and perform the appropriate steps to make it.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO:
#===============================================================================
sub changeTag {
    my ( $tag, $tagname, $oldValue, $newValue, $setFunc, $isString ) = @_;

    return 0 unless defined($newValue);
    return 0 if $oldValue eq $newValue;

    $isString = 0 unless defined($isString);

    if ( !$isString ) {
        return 0 if int($oldValue) == int($newValue);
    }

    print ">> Changing $tagname to '$newValue'\n";
    $tag->$setFunc(
        (   $isString
            ? Audio::TagLib::String->new($newValue)
            : $newValue
        )
    );
    return 1;
}

#===  FUNCTION  ================================================================
#         NAME: restoreTimes
#      PURPOSE: Restore the original times to a file which has had its tags
#               changed
#   PARAMETERS: $tags           Hashref keyed by tagname and containing tag
#                               contents (and file attributes) from a media
#                               file. The file details have keys beginning
#                               with '_'.
#      RETURNS: Number of files changed (see 'utime')
#  DESCRIPTION: Uses the Perl 'utime' function to change the file's access
#               time and modification time to whatever is in the hash. These
#               are expected to be the times the file had when it was first
#               encountered.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO:
#===============================================================================
sub restoreTimes {
    my ($tags) = @_;

    return utime( $tags->{_atime}, $tags->{_mtime}, $tags->{_path} );
}

#===  FUNCTION  ================================================================
#         NAME: checkConformity
#      PURPOSE: Check that the master file has conforming tags, fixing them if
#               not
#   PARAMETERS: $ref            Audio::TagLib::FileRef relating to the master
#                               file
#               $tag            Audio::TagLib::Tag containing the tags of the
#                               master file
#               $tags           Hashref containing the converted tags (and
#                               a few other odds and sods)
#               $kosher         Hashref containing the checking values (see
#                               %tag_control in the main program)
#      RETURNS: A list containing $ref, $tag and $tags as described above
#  DESCRIPTION: Implements a number of complex rules. Firstly the 'genre' tag
#               is expected to contain 'Podcast'. Secondly the 'album' tag
#               must contain 'Hacker Public Radio'. If it does not then the
#               value is stored for later then replaced. Finally the
#               'comment' tag must begin with 'http://hackerpublicradio.org'.
#               If it does not its current contents are stored and replaced
#               with the required URL. However, the comment tag will also
#               contain the saved album tag (if any) and the saved comment,
#               and these will be placed at the end.
#       THROWS: No exceptions
#     COMMENTS: This code is ugly and difficult to extend and maintain.
#               TODO look into ways of improving it!
#     SEE ALSO:
#===============================================================================
sub checkConformity {
    my ( $ref, $tag, $tags, $kosher ) = @_;

    my $changed = 0;
    my %saved;
    my ( $t, $commentOK, $newval );

    #
    # The 'genre' tag
    #
    $t = 'genre';
    unless ( $tags->{$t} =~ /$kosher->{$t}->{re}/ ) {
        $changed += changeTag(
            $tag, $t, $tags->{$t},
            $kosher->{$t}->{ok},
            @{ $tagmethods{$t} }
        );
        $tags->{genre} = $tag->genre()->toCString();
    }

    #
    # The 'album' tag. We save this one for adding to the comment
    #
    $t = 'album';
    unless ( $tags->{$t} =~ /$kosher->{$t}->{re}/ ) {
        ( $saved{$t} = $tags->{$t} ) =~ s/(^\s+|\s+$)//g;
        $changed += changeTag(
            $tag, $t, $tags->{$t},
            $kosher->{$t}->{ok},
            @{ $tagmethods{$t} }
        );
        $tags->{album} = $tag->album()->toCString();
    }

    #
    # If the 'comment' is non-standard *or* if the 'album' was changed we want
    # to do stuff here. We make sure the 'comment' is good and append the
    # original 'album' and 'comment' as appropriate.
    #
    $t = 'comment';
    $commentOK = $tags->{$t} =~ /$kosher->{$t}->{re}/;
    unless ( !$changed && $commentOK ) {
        ( $saved{$t} = $tags->{$t} ) =~ s/(^\s+|\s+$)//g;

        if ($changed) {
            if ($commentOK) {
                # Album had errors, comment is OK
                $newval = concat( ", ", $saved{comment}, $saved{album} );
            }
            else {
                # Album had errors, comment also
                $newval = concat( ", ", $kosher->{$t}->{ok},
                    $saved{album}, $saved{comment} );
            }
        }
        else {
            # Comment had errors, album OK
            $newval = concat( ", ", $kosher->{$t}->{ok}, $saved{comment} );
        }

        $changed += changeTag( $tag, $t, $tags->{$t},
            $newval, @{ $tagmethods{$t} } );
        $tags->{comment} = $tag->comment()->toCString();
    }

    #
    # Save any changes
    #
    if ($changed) {
        $ref->save();
    }

    #
    # Return tag-related stuff so the caller can get the benefit
    #
    return ( $ref, $tag, $tags );

}

#===  FUNCTION  ================================================================
#         NAME: concat
#      PURPOSE: Reimplementation of join but with any undefined or empty
#               arguments removed
#   PARAMETERS: $sep            The string to be used to separate elements in
#                               the result
#               [variable args] Any number of arguments to be joined together
#                               with the separator
#      RETURNS: The concatenated arguments
#  DESCRIPTION: Giving 'join' an array that may contain undefined elements will
#               result in empty results in the output string and error
#               messages as the undefined elements are processed. Giving it
#               empty string elements will result in dangling separators in
#               the output. This routine removes the undefined and empty
#               elements before joining the rest.
#       THROWS: No exceptions
#     COMMENTS: None
#     SEE ALSO:
#===============================================================================
sub concat {
    my $sep = shift;

    my @args = grep { defined($_) && length($_) > 0 } @_;

    return join( $sep, @args );
}

__END__

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#  Application Documentation
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#{{{

=head1 NAME

transfer_tags - standardise and transfer tags between HPR audio files

=head1 VERSION

This documentation refers to I<transfer_tags> version 1.4.2


=head1 USAGE

        transfer_tags masterfile


=head1 REQUIRED ARGUMENTS

=over 4

=item B<masterfile>

This is the name of the audio file, which contains the definitive tags which
are to be copied to all of the other files of the same name but different
extensions.

=back


=head1 DESCRIPTION

The script transfers ID3 (or equivalent) tags from a base file to whichever of
FLAC, MP3, OGG, SPX and WAV versions are found. The tags copied are: B<album>,
B<artist>, B<comment>, B<genre>, B<title>, B<track> and B<year>. The target
files are determined by taking the name of the B<master file> without its
extension and appending all of the remaining extensions in the list. Files
with the string "B<_mez>" between the filename and the extension are also
included.

For example: if the B<master file> is called B<hpr1234.flac> and FLAC, MP3,
OGG, SPX and WAV versions exist, the tags found in the file B<hpr1234.flac>
are copied to B<hpr1234.mp3>, B<hpr1234.ogg>, B<hpr1234.spx> and
B<hpr1234.wav>. If B<hpr1234_mez.mp3> or any other variant existed it would
also receive a copy of the tags.

A certain amount of manipulation is performed before the tags are propagated.
The changes made conform to certain rules, which are:

=over 4

=item .

The B<genre> tag must contain the string "I<Podcast>".

=item .

The B<album> tag must contain the string "I<Hacker Public Radio>". If it does
not then the existing value is stored for later and is then replaced.

=item .

The B<comment> tag must begin with the string
"I<http://hackerpublicradio.org>".  If it does not its current contents are
stored and replaced with the required URL. However, the comment tag will also
contain the saved album tag (if any) and the saved comment (if any), and these
will be placed at the end, separated by commas.

=back

The script saves the access time and modification time of all of the media
files it processes. It then restores these times at the end of its run. This
prevents any external processes which depend on these file times from being
confused by the tag changes.

=head1 DIAGNOSTICS

=over 4

=item B<Usage: transfer_tags masterfile>

This error is produced if the script is called without the mandatory argument.
The error is fatal.

=item B<... does not exist>

The master file specified as the argument does not exist. The error is fatal.

=item B<... does not have a recognised suffix (expecting ...)>

The master file specified as the argument does not have one of the expected
extensions (flac, mp3, ogg, spx, wav). The error is fatal.

=item B<... is not readable>

One of the target files was found not to be readable (probably due to file
permissions). The script will ignore this file.

=item B<Time restoration failed on ...>

The master file or one of the target files could not have its time restored.
The script will ignore this file.

=back


=head1 DEPENDENCIES

    Audio::TagLib
    File::Basename
    File::Find::Rule


=head1 BUGS AND LIMITATIONS

There are no known bugs in this module.
Please report problems to Dave Morriss (Dave.Morriss@gmail.com).
Patches are welcome.


=head1 AUTHOR

Dave Morriss (Dave.Morriss@gmail.com) 2013


=head1 LICENCE AND COPYRIGHT

Copyright (c) 2013 Dave Morriss (Dave.Morriss@gmail.com). All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See perldoc perlartistic.

=cut

#}}}

# [zo to open fold, zc to close]

# vim: syntax=perl:ts=8:sw=4:et:ai:tw=78:fo=tcrqn21:fdm=marker
