#!/bin/bash # Downloads videos from youtube based on selection from http://thebestofyoutube.com # (c) Ken Fallon http://kenfallon.com # Released under the CC-0 # https://www.youtube.com/subscription_manager -> /mnt/media/Videos/channels/subscription_manager.opml # xmllint --format /mnt/media/Videos/channels/subscription_manager.opml | sponge /mnt/media/Videos/channels/subscription_manager.opml savepath="/mnt/media/Videos/channels" subscriptions="${savepath}/subscription_manager.opml" logfile="${savepath}/log/downloaded.log" youtubedl="/mnt/media/Videos/youtube-dl/youtube-dl" DRYRUN="echo DEBUG: " maxlength=7200 # two hours #skipcrap="fail |react |live |Best Pets|BLOOPERS|Kids Try" if [ ! -e "${subscriptions}" ] then echo "Cannot find subscription file \"${subscriptions}\"" exit 1 fi if [ -e "${logfile}_getlist" ] then rm -v "${logfile}_getlist" fi if [ -e "${logfile}_todo" ] then rm -v "${logfile}_todo" fi if [ -e "${logfile}" ] then cp -v "${logfile}" "${logfile}".$(/bin/date +%Y%m%d%H%M%S) else touch "${logfile}" fi xmlstarlet sel -T -t -m '/opml/body/outline/outline' -v 'concat( @xmlUrl, " ", @title)' -n "${subscriptions}" | while read subscription title do echo "Getting \"${title}\"" wget -q "${subscription}" -O - | xmlstarlet sel -T -t -m '/_:feed/_:entry/media:group/media:content' -v '@url' -n - | awk -F '?' '{print $1}' >> "${logfile}_getlist" done count=1 total=$( sort "${logfile}_getlist" | uniq | wc -l ) sort "${logfile}_getlist" | uniq | while read thisvideo do if [ "$( grep "${thisvideo}" "${logfile}" | wc -l )" -eq 0 ]; then metadata="$( ${youtubedl} --dump-json ${thisvideo} )" uploader="$( echo $metadata | jq '.uploader' | awk -F '"' '{print $2}' )" title="$( echo $metadata | jq '.title' | awk -F '"' '{print $2}' )" upload_date="$( echo $metadata | jq '.upload_date' | awk -F '"' '{print $2}' )" id="$( echo $metadata | jq '.id' | awk -F '"' '{print $2}' )" duration="$( echo $metadata | jq '.duration' )" if [[ -z ${duration} || ${duration} -le 0 ]] then echo -e "\nError: The duration \"${length}\" is strange. \"${thisvideo}\"." continue elif [[ ${duration} -ge ${maxlength} ]] then echo -e "\nFilter: You told me not to download titles over ${maxlength} seconds long \"${title}\", \"${thisvideo}\"" continue fi if [[ ! -z "${skipcrap}" && $( echo ${title} | egrep -i "${skipcrap}" | wc -l ) -ne 0 ]] then echo -e "\nSkipping: You told me not to download this stuff. ${uploader}: \"${title}\", \"${thisvideo}\"" continue else echo -e "\n${uploader}: \"${title}\", \"${thisvideo}\"" fi echo ${thisvideo} >> "${logfile}_todo" echo -e $( echo $metadata | jq '.description' ) > "${savepath}/description/${id}.txt" else echo -ne "\rProcessing ${count} of ${total}" fi count=$((count+1)) done echo "" # Download the list if [ -e "${logfile}_todo" ]; then cat "${logfile}_todo" | ${youtubedl} --batch-file - --ignore-errors --no-mtime --restrict-filenames --format mp4 -o "${savepath}"'/%(uploader)s/%(upload_date)s-%(title)s⋄%(id)s.%(ext)s' cat "${logfile}_todo" >> ${logfile} fi if [ -e "${logfile}_getlist" ] then rm -v "${logfile}_getlist" fi if [ -e "${logfile}_todo" ] then rm -v "${logfile}_todo" fi echo "${savedir}"