New feeds added since 28th January 2023

This commit is contained in:
Dave Morriss 2023-02-19 19:54:25 +00:00
parent de0c54c99a
commit be7cb01720
9 changed files with 572 additions and 190 deletions

View File

@ -29,9 +29,9 @@
# BUGS: ---
# NOTES: ---
# AUTHOR: Dave Morriss (djm), Dave.Morriss@gmail.com
# VERSION: 0.1.2
# VERSION: 0.1.4
# CREATED: 2013-12-25 12:40:33
# REVISION: 2023-01-10 22:44:38
# REVISION: 2023-01-31 20:45:23
#
#-------------------------------------------------------------------------------
# Released under the terms of the GNU Affero General Public License (AGPLv3)
@ -58,6 +58,7 @@ use Getopt::Long;
use Pod::Usage;
use Config::General;
use List::MoreUtils qw{uniq};
use Set::Array;
use Log::Handler;
use Try::Tiny;
@ -93,7 +94,7 @@ use Data::Dumper;
#
# Version number (manually incremented)
#
our $VERSION = '0.1.2';
our $VERSION = '0.1.4';
#
# Script name
@ -106,6 +107,7 @@ our $VERSION = '0.1.2';
my ( $action_mode, @urls, @deletions );
my ( $rules, $robot_name ) = ( undef, "$PROG/$VERSION" );
my ( $sth1, $h1, $rv, $search_target, $rejectcount );
my ( $loadfile, $deletefile ) = ( '', '' );
my $feeds;
@ -208,12 +210,16 @@ my $cfgfile
my $dry_run = ( defined( $options{'dry-run'} ) ? $options{'dry-run'} : 0 );
my $silent = ( defined( $options{silent} ) ? $options{silent} : 0 );
my $loadfile = $options{'load'};
my $deletefile = $options{'delete'};
my $load = $options{'load'};
my $delete = $options{'delete'};
my $inputfile = $options{'input'};
my $scan = ( defined( $options{scan} ) ? $options{scan} : 0 );
my $refresh = ( defined( $options{refresh} ) ? $options{refresh} : 0 );
my $expire = ( defined( $options{expire} ) ? $options{expire} : 0 );
my $html = ( defined( $options{html} ) ? $options{html} : 0 );
my $ignore_case
= ( defined( $options{'ignore-case'} ) ? $options{'ignore-case'} : 0 );
my $check = $options{check};
my $outfile = $options{out};
@ -231,10 +237,22 @@ my $template = $options{template};
# Sanity
#
die "Choose either -load or -delete, not both\n"
if (defined($loadfile) && defined($deletefile));
if (defined($load) && defined($delete));
die "Options -load and -delete should not be combined with -scan or -refresh\n"
if ( ( defined($loadfile) || defined($deletefile) )
&& ( $scan || $refresh ) );
if ( ( defined($load) || defined($delete) ) && ( $scan || $refresh ) );
#
# Check the -input=FILE option is used with -load or -delete and confirm the
# existence and readability of the input file if specified.
#
if ( defined($inputfile) ) {
die "Option -input=FILE must be used with -load or -delete\n"
unless ($load || $delete);
die "File in '-input=$inputfile' does not exist\n"
unless -e $inputfile;
die "File in '-input=$inputfile' is not readable\n"
unless -r $inputfile;
}
#
# Check the configuration file
@ -242,28 +260,30 @@ die "Options -load and -delete should not be combined with -scan or -refresh\n"
die "Unable to find configuration file $cfgfile\n" unless ( -e $cfgfile );
#
# Process the load option and the delete option, checking any files mentioned,
# and determining the primary action we're aiming for.
# Determine the action mode, reading a file and/or using URLs on the command
# line.
#
if (optionalFile('load', $loadfile)) {
if ($load) {
$action_mode = 'load';
$loadfile = $inputfile;
_debug(
$DEBUG > 0,
"Action mode: $action_mode",
( $loadfile eq ''
? "Load from arguments"
: "File to load: $loadfile"
( defined($loadfile)
? "File to load: $loadfile"
: "Load from arguments"
)
);
}
elsif (optionalFile('delete', $deletefile)) {
elsif ($delete) {
$action_mode = 'delete';
$deletefile = $inputfile;
_debug(
$DEBUG > 0,
"Action mode: $action_mode",
( $deletefile eq ''
? "Delete from arguments"
: "File to delete from $deletefile"
( defined($deletefile)
? "File to delete from: $deletefile"
: "Delete from arguments"
)
);
}
@ -271,13 +291,15 @@ else {
$action_mode = 'none';
}
#
# The copyright checking mode defaults to 'auto' if the option has no value,
# or may be 'manual' or 'none'. If the option is not used at all it defaults
# to 'none'. It's only relevant to the 'load' action though.
#
if ( $action_mode eq 'load' ) {
if ( $action_mode eq 'load' && defined($check) ) {
# if ( $action_mode eq 'load' && defined($check) ) {
if ( defined($check) ) {
$check =~ s/(^\s+|\s+$)//g;
if ($check =~ /^$/) {
$check = "auto";
@ -295,8 +317,10 @@ if ( $action_mode eq 'load' ) {
emit($silent,"Copyright check mode = $check\n");
}
emit($silent,"Dry run mode = " . ($dry_run ? "On" : "Off") . "\n");
emit($silent,"----\n");
if ($dry_run) {
emit( $silent, "Dry run mode = On\n" );
emit( $silent, "----\n" );
}
# TODO: Does it make sense to have -load and -report, etc at the same time?
#
@ -374,16 +398,19 @@ my $dbh
$dbh->do('PRAGMA foreign_keys = ON');
#
# Check we have something to do
# Check we have something to do. NOTE: this check is a bit early because we
# haven't read the input file yet, if there is one.
#
my $rows = countRows( $dbh, 'SELECT count(*) FROM urls' );
my $work = (
( scalar(@urls) > 0 && $action_mode =~ /load|delete/ )
|| ( defined($report)
|| defined($json)
|| defined($opml)
|| defined($template) )
|| ( ( $scan || $refresh ) && $rows > 0 )
( ( scalar(@urls) > 0 || defined($inputfile) )
&& $action_mode =~ /load|delete/
)
|| ( defined($report)
|| defined($json)
|| defined($opml)
|| defined($template) )
|| ( ( $scan || $refresh ) && $rows > 0 )
);
unless ($work) {
@ -501,7 +528,7 @@ if ($action_mode eq 'load') {
@urls = loadUrls( $dbh, \@urls, $rules, \%keymap, $dry_run );
}
}
elsif ($action_mode eq 'delete') {
elsif ( $action_mode eq 'delete' ) {
#
# Process the delete file if there is one
#
@ -513,7 +540,8 @@ elsif ($action_mode eq 'delete') {
or die "$PROG : failed to open load file '$deletefile' : $!\n";
chomp( @deletions = <$del> );
close($del)
or warn "$PROG : failed to close delete file '$deletefile' : $!\n";
or warn
"$PROG : failed to close delete file '$deletefile' : $!\n";
#
# Add the loaded URLs to the array
@ -531,26 +559,70 @@ elsif ($action_mode eq 'delete') {
# TODO: check that these URLs are actually in the database! Seems
# silly to report "Failed to delete" when it's not there anyway!
#
#
# There are URLs to delete. Process them one by one.
#
if ($dry_run) {
emit( $silent,
"Would have deleted " . scalar(@urls) . " URLs\n" )
"Would have deleted "
. scalar(@urls)
. " URLs (after checking)\n" );
}
else {
$sth1 = $dbh->prepare(q{DELETE from urls WHERE url = ?});
my @missing;
#
# Check the URLs exist
#
$sth1 = $dbh->prepare(q{SELECT id from urls WHERE url = ?});
foreach my $rec (@urls) {
$rv = $sth1->execute($rec);
if ( $dbh->err ) {
warn $dbh->errstr;
}
if ( $rv != 0 ) {
emit ( $silent, "Deleted $rec ($rv rows)\n" );
$LOG->info( "Deleted URL '$rec' from the database" );
$h1 = $sth1->fetchrow_hashref;
unless ($h1) {
emit( $silent, "Could not find URL $rec in the database\n" );
$LOG->warning(
"Failed to delete '$rec'; not in the database");
push( @missing, $rec );
}
else {
emit ( $silent, "Failed to delete $rec\n" );
$LOG->warning( "Failed to delete '$rec' from the database" );
}
#
# Remove the missing URLs from @urls
#
if (@missing) {
my $sa1 = Set::Array->new(@urls);
my $sa2 = Set::Array->new(@missing);
@urls = $sa1->difference($sa2);
}
#
# If nothing is left we're done
#
unless (@urls) {
warn "No URLs left after cleaning\n";
}
else {
#
# Delete what's left after cleaning
#
$sth1 = $dbh->prepare(q{DELETE from urls WHERE url = ?});
foreach my $rec (@urls) {
$rv = $sth1->execute($rec);
if ( $dbh->err ) {
warn $dbh->errstr;
}
if ( $rv != 0 ) {
emit( $silent, "Deleted $rec ($rv rows)\n" );
$LOG->info("Deleted URL '$rec' from the database");
}
else {
emit( $silent, "Failed to delete $rec\n" );
$LOG->warning(
"Failed to delete '$rec' from the database");
}
}
}
}
@ -584,7 +656,7 @@ if ( defined($report) ) {
#
# Reporting a specific title
#
my @matches = searchTitle( $dbh, $search_target );
my @matches = searchTitle( $dbh, $search_target, $ignore_case );
if (@matches) {
#
# Too many matches!
@ -979,6 +1051,8 @@ sub loadUrls {
# PURPOSE: Search the database for a feed with a given title
# PARAMETERS: $dbh database handle
# $target search target
# $ignore_case Boolean controlling whether it's a caseles
# search
# RETURNS: A list of titles
# DESCRIPTION:
# THROWS: No exceptions
@ -986,7 +1060,7 @@ sub loadUrls {
# SEE ALSO: N/A
#===============================================================================
sub searchTitle {
my ($dbh, $target) = @_;
my ($dbh, $target, $ignore_case) = @_;
my ( $sql1, $sql2, $sth, $rv, $h );
my ( $count, @result );
@ -1006,11 +1080,16 @@ sub searchTitle {
ORDER BY title
};
#
# Handle caseless searches
#
$target = ($ignore_case ? '(?i)' : '') . $target;
#
# Count the number of matches
#
$sth = $dbh->prepare($sql1);
$rv = $sth->execute($search_target);
$rv = $sth->execute($target);
if ( $dbh->err ) {
warn $dbh->errstr;
return;
@ -1025,7 +1104,7 @@ sub searchTitle {
if ( $count >= 1 ) {
$sth = $dbh->prepare($sql2);
$rv = $sth->execute($search_target);
$rv = $sth->execute($target);
if ( $dbh->err ) {
warn $dbh->errstr;
return;
@ -1555,6 +1634,8 @@ sub reportFeed {
'urls_link' => 'Link',
'urls_modified' => 'Modified on',
'urls_reason_accepted' => 'Reason accepted',
'urls_status' => 'Status',
'urls_summary' => 'Summary',
'urls_title' => 'Title',
'urls_url' => 'Feed URL',
'urls_urltype' => 'URL type',
@ -1573,6 +1654,7 @@ sub reportFeed {
'urls_check_type',
'urls_reason_accepted',
'urls_description',
'urls_summary',
'urls_dns',
'urls_generator',
'urls_host_up',
@ -1583,6 +1665,7 @@ sub reportFeed {
'urls_modified',
'urls_parent_id',
'urls_child_count',
'urls_status',
);
@seq2 = (
@ -1604,10 +1687,10 @@ sub reportFeed {
print $fh "Channel:\n";
foreach my $key (@seq1) {
#
# Format the feed description with a left margin using textFormat.
# Everything else gets a simpler layout.
# Format the feed description, summary and copyright with a left
# margin using textFormat. Everything else gets a simpler layout.
#
if ($key eq 'urls_description') {
if ($key =~ /^urls_(description|summary|copyright)$/) {
printf $fh "%s\n",
textFormat(
coalesce( $feed->{$key}, '--' ),
@ -2638,40 +2721,6 @@ sub updateEnclosures {
$h1 = $sth1->fetchrow_hashref;
}
#=== FUNCTION ================================================================
# NAME: optionalFile
# PURPOSE: Process an option of the form '-opt:s' where 's' is an
# optional filename.
# PARAMETERS: $optionName Name of option
# $optionValue Value of option (assumed to be blank of
# a filename)
# RETURNS: A boolean: 1 (true) if there is a filename, 0 (false) if the
# name has been omitted.
# DESCRIPTION: The $optionValue will be blank or a filename. If the latter
# then the existence of the file and its readbility are checked
# and the script dies if either test fails.
# THROWS: No exceptions
# COMMENTS: None
# SEE ALSO: N/A
#===============================================================================
sub optionalFile {
my ( $optionName, $optionValue ) = @_;
if (defined($optionValue)) {
if ( $optionValue =~ /^$/ ) {
return 1;
}
else {
die "File in '-$optionName=$optionValue' does not exist\n"
unless -e $optionValue;
die "File in '-$optionName=$optionValue' is not readable\n"
unless -r $optionValue;
return 1;
}
}
return 0;
}
#=== FUNCTION ================================================================
# NAME: textFormat
# PURPOSE: Formats a block of text in an indented, wrapped style with
@ -2928,11 +2977,12 @@ sub Options {
my ($optref) = @_;
my @options = (
"help", "manpage", "debug=i", "dry-run!",
"silent!", "load:s", "delete:s", "scan!",
"refresh!", "report:s", "html!", "check:s",
"json:s", "opml:s", "config=s", "out=s",
"rejects:s", "template:s",
"help", "manpage", "debug=i", "dry-run!",
"silent!", "load", "delete", "input=s",
"scan!", "refresh!", "expire!", "report:s",
"ignore-case!", "html!", "check:s", "json:s",
"opml:s", "config=s", "out=s", "rejects:s",
"template:s",
);
if ( !GetOptions( $optref, @options ) ) {
@ -2956,7 +3006,7 @@ feedWatcher - watch a collection of podcast feeds
=head1 VERSION
This documentation refers to I<feedWatcher> version 0.1.2
This documentation refers to I<feedWatcher> version 0.1.4
=head1 USAGE

Binary file not shown.

View File

@ -718,6 +718,14 @@
<dt><a href="https://parlonslinux.fr/@ParlonsLinuxFR">Parlons Linux</a> (<a href="https://parlonslinux.fr/@ParlonsLinuxFR/feed.xml">feed</a>)</dt>
<dd>&lt;p&gt;Ce Podcast est destin&eacute; a toute personne d&eacute;sirant apprendre de mani&egrave;re diff&eacute;rente sur Linux. Mais &eacute;galement une porte ouverte &agrave; tout ceux qui d&eacute;sire cr&eacute;e leur podcast la plateforme est ouvert &agrave; tous.&lt;/p&gt;</dd>
<dt><a href="https://podcastle.org/">PodCastle</a> (<a href="http://podcastle.org/feed/">feed</a>)</dt>
@ -1006,6 +1014,14 @@
<dt><a href="https://cast.postmarketos.org/">postmarketOS Podcast</a> (<a href="https://cast.postmarketos.org/feed.rss">feed</a>)</dt>
<dd>News/interviews/anecdotes around postmarketOS, straight from the source.</dd>
<dt><a href="https://urandom-podcast.info/">urandom podcast</a> (<a href="http://feeds.feedburner.com/urandom-podcast/ogg">feed</a>)</dt>

File diff suppressed because it is too large Load Diff

View File

@ -332,6 +332,11 @@ NonCommercial NoDerivs licence.
- Feed: https://opensourcesecuritypodcast.libsyn.com/rss
- Copyright: This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
- **Parlons Linux**
- Website: https://parlonslinux.fr/@ParlonsLinuxFR
- Feed: https://parlonslinux.fr/@ParlonsLinuxFR/feed.xml
- Copyright: CC BY-SA 3.0
- **PodCastle**
- Website: https://podcastle.org/
- Feed: http://podcastle.org/feed/
@ -512,6 +517,11 @@ NonCommercial NoDerivs licence.
- Feed: http://fullcirclemagazine.org/category/podcast/feed/
- Copyright:
- **postmarketOS Podcast**
- Website: https://cast.postmarketos.org/
- Feed: https://cast.postmarketos.org/feed.rss
- Copyright: Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) License
- **urandom podcast**
- Website: https://urandom-podcast.info/
- Feed: http://feeds.feedburner.com/urandom-podcast/ogg

View File

@ -2,8 +2,8 @@
<opml version="1.1">
<head>
<title>Free Culture Podcasts</title>
<dateCreated>2023-01-28 14:09:45</dateCreated>
<dateModified>2023-01-28 14:09:45</dateModified>
<dateCreated>2023-02-19 19:51:22</dateCreated>
<dateModified>2023-02-19 19:51:22</dateModified>
<ownerName></ownerName>
<ownerEmail></ownerEmail>
<expansionState></expansionState>
@ -79,6 +79,7 @@
<outline description="A bi-weekly discussion of legal issues in the FLOSS world, including interviews, from the Software Freedom Law Center offices in New York. Presented by Karen Sandler and Bradley M. Kuhn." htmlUrl="http://www.softwarefreedom.org/podcast/http://www.softwarefreedom.org/" text="OggcastSoftware Freedom Law Center" title="OggcastSoftware Freedom Law Center" xmlUrl="http://www.softwarefreedom.org/feeds/podcast-mp3/" />
<outline description="Creative Commons presents conversations with people working to make the Internet and our global culture more open and collaborative." htmlUrl="https://anchor.fm/creativecommons" text="Open Minds … from Creative Commons" title="Open Minds … from Creative Commons" xmlUrl="https://anchor.fm/s/4d70d828/podcast/rss" />
<outline description="A security podcast geared towards those looking to better understand security topics of the day. Hosted by Kurt Seifried and Josh Bressers covering a wide range of topics including IoT, application security, operational security, cloud, devops, and security news of the day. There is a special open source twist to the discussion often giving a unique perspective on any given topic." htmlUrl="http://opensourcesecuritypodcast.com" text="Open Source Security Podcast" title="Open Source Security Podcast" xmlUrl="https://opensourcesecuritypodcast.libsyn.com/rss" />
<outline description="&lt;p>Ce Podcast est destiné a toute personne désirant apprendre de manière différente sur Linux. Mais également une porte ouverte à tout ceux qui désire crée leur podcast la plateforme est ouvert à tous.&lt;/p>" htmlUrl="https://parlonslinux.fr/@ParlonsLinuxFR" text="Parlons Linux" title="Parlons Linux" xmlUrl="https://parlonslinux.fr/@ParlonsLinuxFR/feed.xml" />
<outline description="The Fantasy Fiction Podcast" htmlUrl="https://podcastle.org/" text="PodCastle" title="PodCastle" xmlUrl="http://podcastle.org/feed/" />
<outline description="Um podcast descontraído sobre Ubuntu, a comunidade Ubuntu e tudo o que gira em volta do universo Ubuntu." htmlUrl="https://podcastubuntuportugal.org/" text="Podcast Ubuntu Portugal" title="Podcast Ubuntu Portugal" xmlUrl="https://podcastubuntuportugal.org/feed/podcast/" />
<outline description="Cory Doctorow's Literary Works" htmlUrl="https://craphound.com" text="Podcast Cory Doctorow's craphound.com" title="Podcast Cory Doctorow's craphound.com" xmlUrl="http://feeds.feedburner.com/doctorow_podcast" />
@ -115,6 +116,7 @@
<outline description="En podcast om Wikipedia på svenska" htmlUrl="http://wikipediapodden.se/prenumerera/" text="Wikipediapodden" title="Wikipediapodden" xmlUrl="http://wikipediapodden.se/feed/podcast/" />
<outline description="Talking about the BSD family of free operating systems." htmlUrl="http://bsdtalk.blogspot.com/" text="bsdtalk" title="bsdtalk" xmlUrl="http://feeds.feedburner.com/Bsdtalk" />
<outline description="The independent magazine for the Ubuntu Linux community." htmlUrl="https://fullcirclemagazine.org" text="podcast Full Circle Magazine" title="podcast Full Circle Magazine" xmlUrl="http://fullcirclemagazine.org/category/podcast/feed/" />
<outline description="News/interviews/anecdotes around postmarketOS, straight from the source." htmlUrl="https://cast.postmarketos.org/" text="postmarketOS Podcast" title="postmarketOS Podcast" xmlUrl="https://cast.postmarketos.org/feed.rss" />
<outline description="urandom: your unlimited source of medium quality randomness" htmlUrl="https://urandom-podcast.info/" text="urandom podcast" title="urandom podcast" xmlUrl="http://feeds.feedburner.com/urandom-podcast/ogg" />
</body>
</opml>

Binary file not shown.

View File

@ -1,9 +1,25 @@
--
-- feedWatcher_schema.sql - version 0.0.10
-- feedWatcher_schema.sql - version 0.0.11, 2023-02-01 22:19:15
--
-- Renamed 'feedWatcher.sql' => `feedWatcher_schema.sql' on 2021-08-31
--
/*
* Table 'settings'
* ----------------
*
* Table of configuration settings. Not 100% comfortable with this as a way of
* doing such stuff.
*/
DROP TABLE IF EXISTS settings;
CREATE TABLE settings (
expiry_threshold varchar(20),
last_expiry timestamp
);
INSERT INTO settings (expiry_threshold) VALUES('-2 years');
/*
* Table 'urls'
* ------------

Binary file not shown.