hpr-tools/InternetArchive/ia_db.sql
Dave Morriss 19030fee71 Updates for show "repair" processing
InternetArchive/future_upload: Added logging and debugging

InternetArchive/ia_db.sql: Added new tables

InternetArchive/recover_transcripts: New script to run on 'borg' and
    copy missing files from the backup disk to the IA

InternetArchive/repair_assets: More comments, including one about a bug in the design.

InternetArchive/repair_item: Fix relating to octal numbers (if there are
    leading zeroes in a number). '_DEBUG' is now in the function
    library. Added comments to explain obscure stuff.

InternetArchive/snapshot_metadata: New Bash script (to run on my
    desktop) which collects metadata for a show and stores in in the
    '~/HPR/IA/assets' directory. Runs 'view_derivatives' on it to find
    derivative files for deletion.

InternetArchive/tidy_uploaded: Moves files and directories containing
    uploaded files into a holding area for later backup. Added
    debugging, logging and a 'force' mode.

InternetArchive/upload_manager: Manages 'ia.db' (on my workstation).
    Needs many updates which have just started to be added.

InternetArchive/weekly_upload: Old script, now obsolete.
2024-08-22 13:13:38 +01:00

248 lines
8.0 KiB
SQL

/*
* ia_db.sql
* =========
*
* Schema for SQLite database 'ia.db' used to hold IA upload information
* Last updated: 2024-07-15
*
*/
/* ----------------------------------------------------------------------------
* Table: episodes
*
* id show number from HPR
* rdate release date from HPR
* title title from HPR
* summary summary from HPR
* uploaded Boolean (0,1) showing if an episode has been uploaded
* has_files Boolean (0,1) showing that there are assets for this
* show
* with_files Boolean (0,1) showing that the assets have been uploaded
* with_derived Boolean (0,1) true if we did our own "deriving" of audio
* (*.spx, *.opus, etc), preserving the tags, and these are
* on the IA rather than having been derived by the IA
* software (and having had their tags removed!)
* archive_date date uploaded to the IA
* item_last_updated epoch date of last update (from the IA software)
* IA_URL URL of the uploaded show
* notes any notes about the upload
*
*/
CREATE TABLE episodes (
id integer PRIMARY KEY,
rdate date NOT NULL,
title varchar(100) NOT NULL,
summary varchar(100) NOT NULL,
uploaded integer default 0,
has_files integer default 0,
with_files integer default 0,
with_derived integer default 0,
with_source integer default 0,
archive_date date,
item_last_updated integer default 0,
IA_URL text,
notes text
);
/* ----------------------------------------------------------------------------
* Table: assets
*
* id primary key
* episode_id link to episodes.id (show number the link was found in)
* URL URL of the asset
* filename filename (or path) component (after percent decoding)
* uploaded Boolean (0,1) showing if an asset has been uploaded
*
*/
CREATE TABLE assets (
id integer PRIMARY KEY,
episode_id integer REFERENCES episodes(id),
URL text NOT NULL,
filename text NOT NULL,
uploaded integer default 0
);
/* ----------------------------------------------------------------------------
* Index: assets_filename_idx
*
* Attempt to constrain duplicates in the assets table
*
*/
CREATE UNIQUE INDEX assets_filename_idx ON assets (episode_id, filename);
/* ----------------------------------------------------------------------------
* Table: dirlist
*
* id primary key
* filename filename or path under directory 'eps'
*
*/
CREATE TABLE dirlist (
id integer PRIMARY KEY,
filename text NOT NULL
);
/* ----------------------------------------------------------------------------
* Table: hpr_repairs
*
* episode_id Primary key, foreign key for 'episodes'
* repaired Boolean showing whether the show has been repaired
* repair_date Date of repair
* notes Notes about any anomalies
* asset_count Number of assets (after ignoring transcripts, etc)
*
*/
CREATE TABLE hpr_repairs (
episode_id integer PRIMARY KEY REFERENCES episodes(id),
repaired integer default 0,
repair_date integer default 0,
notes text default null,
asset_count integer default 0
);
/* ----------------------------------------------------------------------------
* Table: ia_repairs
*
* episode_id Primary key, foreign key for 'episodes'
* repaired Boolean showing whether the show has been repaired
* repair_date Date of repair
* notes Notes about any anomalies
*
*/
CREATE TABLE ia_repairs (
episode_id integer PRIMARY KEY REFERENCES episodes(id),
repaired integer default 0,
repair_date integer default 0,
notes text default null
);
/* ----------------------------------------------------------------------------
* Table: show_host_xref
*
* episode_id Foreign key for 'episodes'
* hostid Host number from MySQL database
* hostname Host name from MySQL database
*
*/
CREATE TABLE "show_host_xref" (
"episode_id" integer,
"hostid" integer,
"hostname" text DEFAULT null,
FOREIGN KEY("episode_id") REFERENCES "episodes"("id")
);
/* ----------------------------------------------------------------------------
* Index: show_host_xref_idx
*
* Attempt to constrain duplicates in the show_host_xref table
*
*/
CREATE UNIQUE INDEX "show_host_xref_idx" ON "show_host_xref" (
"episode_id" ASC
);
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_view
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_view;
CREATE VIEW episodes_view AS
SELECT
e.id,
e.rdate,
e.title,
e.summary,
e.uploaded,
e.has_files,
e.with_files,
e.with_derived,
e.archive_date,
e.IA_URL,
CASE e.item_last_updated
WHEN 0 THEN null
ELSE datetime(e.item_last_updated,'unixepoch') END AS item_last_updated,
e.notes,
count(a.url) AS asset_count,
group_concat(a.filename) AS assets
FROM episodes e
LEFT JOIN assets a ON e.id = a.episode_id
GROUP BY e.id
ORDER BY e.id;
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_this_month (based on episodes_view)
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_this_month;
CREATE VIEW episodes_this_month AS
SELECT
*
FROM episodes_view
WHERE rdate between date('now','start of month')
AND date('now','start of month','+1 month','-1 day');
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_last_month
*
* Selects the rows from 'episodes_view' for the previous month
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_last_month;
CREATE VIEW episodes_last_month AS
SELECT
*
FROM episodes_view
WHERE rdate between date('now','start of month','-1 month')
AND date('now','start of month','-1 day');
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: episodes_next_month
*
* Selects the rows from 'episodes_view2' for the next month
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS episodes_next_month;
CREATE VIEW episodes_next_month AS
SELECT
*
FROM episodes_view
WHERE rdate between date('now','start of month','+1 month')
AND date('now','start of month','+2 month','-1 day');
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* View: re_upload
*
* Selects the rows from 'episodes_view' which need to be re-uploaded. Shows the
* last 5 upolads to give context.
*
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
DROP VIEW IF EXISTS re_upload;
CREATE VIEW re_upload AS
SELECT
*
FROM episodes_view
WHERE id
BETWEEN
(SELECT min(id)-5
FROM episodes_view
WHERE id BETWEEN 871 AND 2429
AND uploaded = 1
AND with_derived = 0
GROUP BY id)
AND 2429;
/*
* CREATE VIEW re_upload AS
* SELECT
* *
* FROM episodes_view
* WHERE id BETWEEN 871 AND 2429
* AND uploaded = 1
* AND with_derived = 0;
*
*/
-- vim: syntax=sql:ts=8:sw=4:ai:tw=80:et:fo=tcrqn21:fdm=marker:comments+=b\:--