hpr-tools/PostgreSQL_Database/hpr_schema_2.pgsql
Dave Morriss 50edeccc88 Updates from previous repo
FAQ/FAQ.mkd, FAQ/Makefile: this version of the FAQ is now out of date
    and probably should be deleted.

InternetArchive/repair_item: script to upload missing shows after tie
    out errors during the normal upload; still under development.

InternetArchive/update_state: script to update show state in the
    'reservations' table in the database. Uses the CMS interface.

Link_Checker/scan_links: under development. Not currently usable.

Miscellaneous/fix_tags: audio metadata manipulation script. Recently
    added to this repo for convenience. Updates for 'experimental::try',
    the official Perl try/catch.

PostgreSQL_Database/add_hosts_to_show, PostgreSQL_Database/hpr_schema_2.pgsql,
    PostgreSQL_Database/nuke_n_pave.sh: an old experimental Pg database
    to take over from the previous MySQL version (from before 2023).
    Kept for reference; never implemented.
2024-06-14 16:00:04 +01:00

557 lines
19 KiB
PL/PgSQL

/* =============================================================================
* PostgreSQL Schema V2 - designs for a new HPR database
*
* File: hpr_schema_2.pgsql
* Created: 2017-10-22
* Updated: 2019-04-15
* =============================================================================
*/
/* ------------------------------------------------------------------------------
* Design decisions made:
*
* - Plural names for tables ('episodes' not 'episode')
* - Table names all lower case with underscores
* - Field names all lower case with underscores
* - Primary keys named "[singularOfTableName]ID", so 'episodeID' in the
* 'episodes' table TODO: make it 'episode_ID'??
* - Foreign keys named consistently in all tables
* - Explicit sequences called "[singularOfTableName]_seq"
* - Cross reference tables shouldn't call their fields
* "[pluralTableName]_id" but "[singularOfTableName]_id" because
* 'episodes_id' seems stupid.
*
* ------------------------------------------------------------------------------
* Tables:
*
* comments
* episodes
* episodes_hosts_xref
* episodes_series_xref
* episodes_tags_xref
* hosts
* licenses
* series
* tags
* assets
*
*/
/* ------------------------------------------------------------------------------
* Drop everything to start with. The order is important because of the
* relations between tables. Also, some items are dependent and go with the
* tables.
* ------------------------------------------------------------------------------
*/
-- {{{
DROP TABLE IF EXISTS comments CASCADE;
DROP TABLE IF EXISTS episodes CASCADE;
DROP TABLE IF EXISTS episodes_hosts_xref CASCADE;
DROP TABLE IF EXISTS episodes_series_xref CASCADE;
DROP TABLE IF EXISTS episodes_tags_xref CASCADE;
DROP TABLE IF EXISTS hosts CASCADE;
DROP TABLE IF EXISTS licenses CASCADE;
DROP TABLE IF EXISTS series CASCADE;
DROP TABLE IF EXISTS tags CASCADE;
DROP TABLE IF EXISTS assets CASCADE;
-- DROP INDEX IF EXISTS episode_release_date_key;
DROP SEQUENCE IF EXISTS comment_seq;
DROP SEQUENCE IF EXISTS episode_seq;
DROP SEQUENCE IF EXISTS host_seq;
DROP SEQUENCE IF EXISTS license_seq;
DROP SEQUENCE IF EXISTS series_seq;
DROP SEQUENCE IF EXISTS tag_seq;
DROP SEQUENCE IF EXISTS assets_seq;
-- DROP VIEW IF EXISTS eht_view;
DROP FUNCTION IF EXISTS id_in_licenses(sname varchar);
DROP FUNCTION IF EXISTS id_in_episodes(ekey varchar);
DROP TRIGGER IF EXISTS comment_changed ON comments CASCADE;
DROP FUNCTION IF EXISTS comment_changed();
-- }}}
-- +----------+
-- | licenses |
-- +----------+
-- {{{
/* ------------------------------------------------------------------------------
* Table 'licenses' - licenses relating to episodes (needed because 'episodes'
* and 'hosts' reference it)
*
* license_id primary key, the licence number
* short_name brief name of CC licence
* long_name longer name of CC licence
* url link to details of licence at creativecommons.org
*
* ------------------------------------------------------------------------------
*/
CREATE SEQUENCE license_seq;
ALTER TABLE license_seq
OWNER TO hpradmin;
CREATE TABLE licenses (
license_id integer default nextval('license_seq') PRIMARY KEY,
short_name varchar(11) NOT NULL UNIQUE,
long_name varchar(40) NOT NULL,
url varchar(80) NOT NULL
);
ALTER TABLE licenses
OWNER TO hpradmin;
/*
* Load the table since it's quite short. Don't set the license_id to ensure
* the sequence is updated properly.
*/
INSERT INTO licenses (short_name, long_name, url) VALUES
('CC-0', 'Public Domain Dedication', 'http://creativecommons.org/publicdomain/zero/1.0/'),
('CC-BY', 'Attribution', 'http://creativecommons.org/licenses/by/4.0'),
('CC-BY-SA', 'Attribution-ShareAlike', 'http://creativecommons.org/licenses/by-sa/3.0'),
('CC-BY-ND', 'Attribution-NoDerivs', 'http://creativecommons.org/licenses/by-nd/4.0'),
('CC-BY-NC', 'Attribution-NonCommercial', 'http://creativecommons.org/licenses/by-nc/4.0'),
('CC-BY-NC-SA', 'Attribution-NonCommercial-ShareAlike', 'http://creativecommons.org/licenses/by-nc-sa/4.0'),
('CC-BY-NC-ND', 'Attribution-NonCommercial-NoDerivs', 'http://creativecommons.org/licenses/by-nc-nd/4.0');
/*
* Define a simple function to return the license_id number corresponding to
* a short name in the licenses table. This simplifies setting a default value
* in a foreign key definition in other tables. It's not resilient to errors
* as it stands.
*/
CREATE OR REPLACE FUNCTION id_in_licenses(sname varchar) RETURNS integer AS
$$
SELECT license_id FROM licenses WHERE short_name = sname;
$$
LANGUAGE SQL STABLE;
ALTER FUNCTION id_in_licenses(sname varchar)
OWNER TO hpradmin;
-- }}}
-- +----------+
-- | episodes |
-- +----------+
-- {{{
/* ------------------------------------------------------------------------------
* Table 'episodes' - TWAT and HPR shows
*
* episode_id primary key (NOT the episode number)
* episode_key episode designator
* release_date date the episode was released
* title title of the episode
* summary summary of the episode content
* notes the show notes (as an HTML fragment)
* explicit a Boolean; true for explicit, false for otherwise
* license the licence which the show is under (US spelling)
* duration the duration (time) of the audio
* downloads number of downloads
* archived a Boolean; true if the episode has been uploaded to the IA
* archive_date date the episode was archived
* IA_URL URL to the episode on archive.org
* journal a journal of actions performed on this episode
*
* TODO: Do we need a function to determine the next 'episode_key'?
* NOTE: The 'valid' column has been removed.
*
* ------------------------------------------------------------------------------ */
CREATE SEQUENCE episode_seq;
ALTER TABLE episode_seq
OWNER TO hpradmin;
CREATE TYPE episode_status AS ENUM ('reserved', 'processing', 'posted');
CREATE TABLE episodes (
episode_id integer default nextval('episode_seq') PRIMARY KEY,
episode_key varchar(40) NOT NULL,
release_date date NOT NULL,
title varchar(128) NOT NULL,
summary varchar(128),
notes text NOT NULL,
explicit boolean NOT NULL DEFAULT TRUE,
license integer NOT NULL DEFAULT id_in_licenses('CC-BY-SA')
REFERENCES licenses (license_id),
duration interval NOT NULL DEFAULT '00:00:00',
downloads integer NOT NULL DEFAULT 0,
archived boolean NOT NULL DEFAULT FALSE,
archive_date date,
IA_URL text,
status episode_status,
journal text
);
ALTER TABLE episodes
OWNER TO hpradmin;
CREATE INDEX episode_key_idx
ON episodes
USING btree
(episode_key);
CREATE INDEX episode_release_date_idx
ON episodes
USING btree
(release_date);
/*
* Define a simple function to return the episode_id number corresponding to
* an episode key in the 'episodes' table.
*/
CREATE OR REPLACE FUNCTION id_in_episodes(ekey varchar) RETURNS integer AS
$$
SELECT episode_id FROM episodes WHERE episode_key = ekey;
$$
LANGUAGE SQL STABLE;
ALTER FUNCTION id_in_episodes(ekey varchar)
OWNER TO hpradmin;
-- }}}
-- +-----------+
-- | episodes2 |
-- +-----------+
-- {{{
/* ------------------------------------------------------------------------------
* Table 'episodes2' - TWAT and HPR shows
*
* show_id zero for a TwaT show, 1 for an HPR show
* episode_id episode number in the range of show_id values
* release_date date the episode was released
* title title of the episode
* summary summary of the episode content
* notes the show notes (as an HTML fragment)
* explicit a Boolean; true for explicit, false for otherwise
* license the licence which the show is under (US spelling)
* duration the duration (time) of the audio
* downloads number of downloads
* archived a Boolean; true if the episode has been uploaded to the IA
* archive_date date the episode was archived
* IA_URL URL to the episode on archive.org
* journal a journal of actions performed on this episode
*
* ------------------------------------------------------------------------------ */
-- CREATE TYPE episode_status AS ENUM ('reserved', 'processing', 'posted');
DROP TABLE IF EXISTS episodes2 CASCADE;
CREATE TABLE episodes2 (
show_id smallint NOT NULL DEFAULT 1
CHECK (show_id = 0 OR show_id = 1),
episode_id smallint NOT NULL,
release_date date NOT NULL,
title varchar(128) NOT NULL,
summary varchar(128),
notes text NOT NULL,
explicit boolean NOT NULL DEFAULT TRUE,
license integer NOT NULL DEFAULT id_in_licenses('CC-BY-SA')
REFERENCES licenses (license_id),
duration interval NOT NULL DEFAULT '00:00:00',
downloads integer NOT NULL DEFAULT 0,
archived boolean NOT NULL DEFAULT FALSE,
archive_date date,
IA_URL text,
status episode_status,
journal text,
PRIMARY KEY (show_id, episode_id)
);
ALTER TABLE episodes2
OWNER TO hpradmin;
CREATE INDEX episode2_release_date_idx
ON episodes2
USING btree
(release_date);
-- }}}
-- \/\/ hosts /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-- {{{
/* ------------------------------------------------------------------------------
* Table 'hosts' - hosts contributing shows
*
* host_id primary key, host number
* host host name or handle
* email host email address (cannot be unique because some hosts need
* to have their mail directed to admin@hpr)
* profile text describing the host, HTML preferred
* license the default licence chosen by the host (US spelling)
* local_image true if there's a host-supplied image for the host
* gpg the host's GPG key
* espeak_name the host name written so that 'espeak' speaks it properly
* when_added the date the host was added to the database
*
* TODO: Do we need the 'valid' field? Removed for the moment.
* NOTE: Is there any point in the added date? Most will start as the date the
* table was created in this database, and effort will be needed to set
* approximate dates in the past (from the date of their first shows).
*
* ------------------------------------------------------------------------------ */
CREATE SEQUENCE host_seq;
ALTER TABLE host_seq
OWNER TO hpradmin;
CREATE TABLE hosts (
host_id integer default nextval('host_seq') PRIMARY KEY,
host varchar(1024) UNIQUE NOT NULL,
email varchar(1024) UNIQUE NOT NULL,
profile text,
license integer NOT NULL DEFAULT id_in_licenses('CC-BY-SA')
REFERENCES licenses (license_id),
local_image boolean NOT NULL DEFAULT FALSE,
gpg text,
-- valid boolean NOT NULL DEFAULT TRUE,
espeak_name text,
when_added date NOT NULL DEFAULT now()
);
ALTER TABLE hosts
OWNER TO hpradmin;
/* ------------------------------------------------------------------------------
* Table 'episodes_hosts_xref' - joining table between 'episodes' and 'hosts'
*
* episode_id primary key of the episodes table
* host_id primary key of the hosts table
*
* The two fields jointly make the primary key of this table
*
* TODO: Check the deletion actions
*
* ------------------------------------------------------------------------------ */
CREATE TABLE episodes_hosts_xref (
episode_id integer REFERENCES episodes(episode_id)
ON DELETE RESTRICT,
host_id integer REFERENCES hosts(host_id)
ON DELETE CASCADE,
PRIMARY KEY (episode_id, host_id)
);
ALTER TABLE episodes_hosts_xref
OWNER TO hpradmin;
-- }}}
-- \/\/ tags \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-- {{{
/* ------------------------------------------------------------------------------
* Table 'tags' - tags relating to episodes
*
* tag_id primary key, tag number
* tag a tag
*
* TODO: Tags are case-sensitive. Should they be?
* TODO: Should tags have a unique key on them?
*
* ------------------------------------------------------------------------------ */
CREATE SEQUENCE tag_seq;
ALTER TABLE tag_seq
OWNER TO hpradmin;
CREATE TABLE tags (
tag_id integer default nextval('tag_seq') PRIMARY KEY,
tag varchar(1024) NOT NULL
);
ALTER TABLE tags
OWNER TO hpradmin;
/* ------------------------------------------------------------------------------
* Table 'episodes_tags_xref' - joining table between 'episodes' and 'tags'
*
* episode_id primary key of the episodes table
* tag_id primary key of the tags table
*
* The two fields jointly make the primary key of this table
*
* TODO: Check the deletion actions
*
* ------------------------------------------------------------------------------ */
CREATE TABLE episodes_tags_xref (
episode_id integer REFERENCES episodes(episode_id)
ON DELETE RESTRICT,
tag_id integer REFERENCES tags(tag_id)
ON DELETE CASCADE,
PRIMARY KEY (episode_id, tag_id)
);
ALTER TABLE episodes_tags_xref
OWNER TO hpradmin;
-- }}}
-- \/\/ series \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-- {{{
/* ------------------------------------------------------------------------------
* Table 'series' - series grouping for episodes
*
* series_id primary key, series number
* name name of series
* description description of series (HTML preferred)
* private whether others may contribute to the series
* image ??
*
* NOTE: Removed 'valid'
*
* ------------------------------------------------------------------------------ */
CREATE SEQUENCE series_seq;
ALTER TABLE series_seq
OWNER TO hpradmin;
CREATE TABLE series (
series_id integer default nextval('series_seq') PRIMARY KEY,
name varchar(100) NOT NULL,
description text NOT NULL,
private boolean NOT NULL DEFAULT FALSE,
image text
-- valid boolean NOT NULL DEFAULT TRUE
);
ALTER TABLE series
OWNER TO hpradmin;
/* ------------------------------------------------------------------------------
* Table 'episodes_series_xref' - joining table between 'episodes' and 'series'
*
* episodes_id primary key of the episodes table
* series_id primary key of the series table
*
* The two fields jointly make the primary key of this table
*
* TODO: Check the deletion actions
*
* ------------------------------------------------------------------------------ */
CREATE TABLE episodes_series_xref (
episode_id integer REFERENCES episodes(episode_id)
ON DELETE RESTRICT,
series_id integer REFERENCES series(series_id)
ON DELETE CASCADE,
PRIMARY KEY (episode_id, series_id)
);
ALTER TABLE episodes_series_xref
OWNER TO hpradmin;
-- }}}
-- \/\/ comments \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-- {{{
/* ------------------------------------------------------------------------------
* Table 'comments' - comments relating to episodes
*
* comment_id primary key, comment number
* episode_id primary key of the episodes table
* comment_timestamp UTC timestamp when comment was submitted
* comment_author_name name given by the comment author
* comment_title title given by the comment author
* comment_text text of the comment (NO HTML)
* last_changed UTC timestamp of the last change to the row
*
* TODO: Check the deletion FK actions
*
* ------------------------------------------------------------------------------ */
CREATE SEQUENCE comment_seq;
ALTER TABLE comment_seq
OWNER TO hpradmin;
CREATE TABLE comments (
comment_id integer default nextval('comment_seq') PRIMARY KEY,
episode_id integer REFERENCES episodes(episode_id)
ON DELETE RESTRICT,
comment_timestamp timestamp without time zone NOT NULL,
comment_author_name varchar(1024),
comment_title varchar(1024),
comment_text text,
last_changed timestamp without time zone NOT NULL
DEFAULT timezone('UTC'::text, now())
);
ALTER TABLE comments
OWNER TO hpradmin;
/* ------------------------------------------------------------------------------
* Trigger function 'comment_changed' and trigger on the 'comments' table
*
* ------------------------------------------------------------------------------ */
CREATE FUNCTION comment_changed() RETURNS trigger AS $comment_changed$
BEGIN
-- Remember when the comment row was changed
NEW.last_changed := current_timestamp;
RETURN NEW;
END;
$comment_changed$ LANGUAGE plpgsql;
ALTER FUNCTION comment_changed()
OWNER TO hpradmin;
CREATE TRIGGER comment_changed BEFORE UPDATE ON comments
FOR EACH ROW EXECUTE PROCEDURE comment_changed();
-- }}}
-- \/\/ assets \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-- {{{
/* ------------------------------------------------------------------------------
* Table 'assets' - assets relating to episodes
*
* asset_id primary key
* episode_id link to episodes.episode_id (show the link was found in)
* URL URL of the asset (on the HPR site)
* filename filename (or path) component (after percent decoding)
* uploaded Boolean showing if an asset has been uploaded to the IA
*
* TODO: should we consider recording the IA URL of an asset?
*
* ------------------------------------------------------------------------------ */
CREATE SEQUENCE asset_seq;
ALTER TABLE asset_seq
OWNER TO hpradmin;
CREATE TABLE assets (
asset_id integer default nextval('asset_seq') PRIMARY KEY,
episode_id integer REFERENCES episodes(episode_id)
ON DELETE RESTRICT,
URL text NOT NULL,
filename text NOT NULL,
uploaded boolean NOT NULL DEFAULT FALSE
);
ALTER TABLE assets
OWNER TO hpradmin;
-- }}}
-- Footer ---------------------------------------------------------------------
-- vim: syntax=pgsql:ts=8:sw=4:ai:tw=78:et:fo=tcrqn21:fdm=marker:nu:rnu