Free_Culture_Podcasts/feedWatcher_schema.sql
Dave Morriss db39655199 Additions to the database and feedWatcher
feedWatcher: added the parsing of HTML feeds to get the title tag in the
    <head> area; new database fields relating to the copyright check
    done, and why the feed was allowed in if done so manually; added dry
    run mode; changed the way -load and -delete work so each can be
    given URLs on the command line; starting to report settings at start
    time (needs work); -load and -delete not allowed together; more
    logging; addition of a _debug function; enhancement of reportFeed to
    show one feed and a summary of relevant details (more useful than
    dumping the entire database this way); added getHTMLTitle for
    parsing out the HTML title; enhanced checkCopyright to get a reason
    if in manual mode and a feed is allowed in; needs a lot of
    clean-up!

feedWatcher.{html,json,mkd,opml,pdf}: various reports.

feedWatcher_3.tpl: For making Markdown which is turned into PDF.
    'Licence' becomes 'Copyright'

feedWatcher_5.tpl: for dumping all the URLs in the database
    & regenerating everything

feedWatcher_schema.sql: new fields added
2023-01-14 23:13:49 +00:00

160 lines
4.6 KiB
SQL

--
-- feedWatcher_schema.sql - version 0.0.10
--
-- Renamed 'feedWatcher.sql' => `feedWatcher_schema.sql' on 2021-08-31
--
/*
* Table 'urls'
* ------------
*
* The main table containing host, http and feed information
*/
DROP TABLE IF EXISTS urls;
CREATE TABLE urls (
id integer PRIMARY KEY,
url varchar(1024) NOT NULL,
dns text,
host_up boolean DEFAULT 0,
http_status varchar(80),
content_type varchar(40),
urltype varchar(40),
feedformat varchar(20),
title varchar(1024),
description text,
author varchar(80),
modified timestamp,
link varchar(1024),
image varchar(1024),
copyright varchar(80),
check_type varchar(10) DEFAULT 'none',
reason_accepted text,
generator varchar(80),
language varchar(40),
parent_id integer
REFERENCES urls(id)
ON DELETE CASCADE,
child_count integer DEFAULT 0,
last_update timestamp NOT NULL DEFAULT (CURRENT_TIMESTAMP)
);
--
-- Ensure the 'url' column is unique
--
DROP INDEX IF EXISTS url_index;
CREATE UNIQUE INDEX url_index ON urls ( url );
--
-- Trigger to update the 'last_update' field every time the row changes
--
DROP TRIGGER IF EXISTS urls_last_updated;
CREATE TRIGGER urls_last_updated AFTER UPDATE ON urls
BEGIN
UPDATE urls SET last_update = datetime('now');
END;
/*
* Table 'episodes'
* ----------------
*
* Has a foreign key relationship with table 'urls'. If the matching row in
* 'urls' is deleted the deletion cascades to this table (ON DELETE CASCADE).
* If the primary key in 'urls' is updated the relationship prevents this
* change (ON UPDATE RESTRICT).
*/
DROP TABLE IF EXISTS episodes;
CREATE TABLE episodes (
id integer PRIMARY KEY,
urls_id integer NOT NULL
REFERENCES urls (id)
ON DELETE CASCADE
ON UPDATE RESTRICT,
link varchar(1024),
enclosure varchar(256),
title varchar(256),
author varchar(256),
category text,
source varchar(128),
ep_id varchar(256),
issued timestamp,
modified timestamp,
byte_length bigint,
mime_type varchar(64),
last_update timestamp NOT NULL DEFAULT (CURRENT_TIMESTAMP)
);
--
-- Ensure the 'enclosure' column is unique (per feed)
--
DROP INDEX IF EXISTS enclosure_index;
CREATE UNIQUE INDEX enclosure_index ON episodes ( urls_id, enclosure );
--
-- Trigger to update the 'last_update' field every time the row changes
--
DROP TRIGGER IF EXISTS episodes_last_updated;
CREATE TRIGGER episodes_last_updated AFTER UPDATE ON episodes
BEGIN
UPDATE episodes SET last_update = datetime('now');
END;
--
-- View to simplify joining the two tables.
--
DROP VIEW IF EXISTS all_episodes;
CREATE VIEW all_episodes AS
SELECT
urls.id as urls_id,
urls.url as urls_url,
urls.dns as urls_dns,
urls.host_up as urls_host_up,
urls.http_status as urls_http_status,
urls.content_type as urls_content_type,
urls.urltype as urls_urltype,
urls.feedformat as urls_feedformat,
urls.title as urls_title,
urls.description as urls_description,
urls.author as urls_author,
urls.modified as urls_modified,
urls.link as urls_link,
urls.image as urls_image,
urls.copyright as urls_copyright,
urls.check_type as urls_check_type,
urls.reason_accepted as urls_reason_accepted,
urls.generator as urls_generator,
urls.language as urls_language,
urls.parent_id as urls_parent_id,
urls.child_count as urls_child_count,
urls.last_update as urls_last_update,
ep.id as ep_id,
ep.urls_id as ep_urls_id,
ep.link as ep_link,
ep.enclosure as ep_enclosure,
ep.title as ep_title,
ep.author as ep_author,
ep.category as ep_category,
ep.source as ep_source,
ep.ep_id as ep_ep_id,
ep.issued as ep_issued,
ep.modified as ep_modified,
ep.byte_length as ep_byte_length,
ep.mime_type as ep_mime_type,
ep.last_update as ep_last_update
FROM urls LEFT JOIN episodes ep ON urls.id = ep.urls_id;
-- vim: syntax=sql:ts=8:ai:tw=78:et:fo=tcrqn21:comments+=b\:--