hpr-tools/InternetArchive/parse_ia_audio.awk

#!/usr/bin/awk -f

#-------------------------------------------------------------------------------
# Process tab-delimited data from the Internet Archive with a field name
# header, reporting particular fields. The algorithm is general though this
# instance is specific.
#
# In this case we extract only the audio files
#
# This script is meant to be used thus:
#       $ ia list -va hpr2450 | ./parse_ia_audio.awk
#       hpr2450.flac derivative
#       hpr2450.mp3  derivative
#       hpr2450.ogg  derivative
#       hpr2450.opus original
#       hpr2450.spx  original
#       hpr2450.wav  original
#
#-------------------------------------------------------------------------------

BEGIN {
    FS = "\t"
}

#
# Read the header line and collect the fields into an array such that a search
# by field name returns the field number.
#
NR == 1 {
    for (i = 1; i <= NF; i++) {
        fld[$i] = i
    }
}

#
# Read the rest of the data, reporting only the lines relating to audio files
# and print the fields 'name' and 'source'
#
NR > 1 && $(fld["name"]) ~ /[^.]\.(flac|mp3|ogg|opus|spx|wav)/ {
    printf "%-15s %s\n",$(fld["name"]),$(fld["source"])
}

# vim: syntax=awk:ts=8:sw=4:ai:et:tw=78:nu:rnu: