19030fee71
InternetArchive/future_upload: Added logging and debugging InternetArchive/ia_db.sql: Added new tables InternetArchive/recover_transcripts: New script to run on 'borg' and copy missing files from the backup disk to the IA InternetArchive/repair_assets: More comments, including one about a bug in the design. InternetArchive/repair_item: Fix relating to octal numbers (if there are leading zeroes in a number). '_DEBUG' is now in the function library. Added comments to explain obscure stuff. InternetArchive/snapshot_metadata: New Bash script (to run on my desktop) which collects metadata for a show and stores in in the '~/HPR/IA/assets' directory. Runs 'view_derivatives' on it to find derivative files for deletion. InternetArchive/tidy_uploaded: Moves files and directories containing uploaded files into a holding area for later backup. Added debugging, logging and a 'force' mode. InternetArchive/upload_manager: Manages 'ia.db' (on my workstation). Needs many updates which have just started to be added. InternetArchive/weekly_upload: Old script, now obsolete.
248 lines
8.0 KiB
SQL
248 lines
8.0 KiB
SQL
/*
|
|
* ia_db.sql
|
|
* =========
|
|
*
|
|
* Schema for SQLite database 'ia.db' used to hold IA upload information
|
|
* Last updated: 2024-07-15
|
|
*
|
|
*/
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Table: episodes
|
|
*
|
|
* id show number from HPR
|
|
* rdate release date from HPR
|
|
* title title from HPR
|
|
* summary summary from HPR
|
|
* uploaded Boolean (0,1) showing if an episode has been uploaded
|
|
* has_files Boolean (0,1) showing that there are assets for this
|
|
* show
|
|
* with_files Boolean (0,1) showing that the assets have been uploaded
|
|
* with_derived Boolean (0,1) true if we did our own "deriving" of audio
|
|
* (*.spx, *.opus, etc), preserving the tags, and these are
|
|
* on the IA rather than having been derived by the IA
|
|
* software (and having had their tags removed!)
|
|
* archive_date date uploaded to the IA
|
|
* item_last_updated epoch date of last update (from the IA software)
|
|
* IA_URL URL of the uploaded show
|
|
* notes any notes about the upload
|
|
*
|
|
*/
|
|
CREATE TABLE episodes (
|
|
id integer PRIMARY KEY,
|
|
rdate date NOT NULL,
|
|
title varchar(100) NOT NULL,
|
|
summary varchar(100) NOT NULL,
|
|
uploaded integer default 0,
|
|
has_files integer default 0,
|
|
with_files integer default 0,
|
|
with_derived integer default 0,
|
|
with_source integer default 0,
|
|
archive_date date,
|
|
item_last_updated integer default 0,
|
|
IA_URL text,
|
|
notes text
|
|
);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Table: assets
|
|
*
|
|
* id primary key
|
|
* episode_id link to episodes.id (show number the link was found in)
|
|
* URL URL of the asset
|
|
* filename filename (or path) component (after percent decoding)
|
|
* uploaded Boolean (0,1) showing if an asset has been uploaded
|
|
*
|
|
*/
|
|
CREATE TABLE assets (
|
|
id integer PRIMARY KEY,
|
|
episode_id integer REFERENCES episodes(id),
|
|
URL text NOT NULL,
|
|
filename text NOT NULL,
|
|
uploaded integer default 0
|
|
);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Index: assets_filename_idx
|
|
*
|
|
* Attempt to constrain duplicates in the assets table
|
|
*
|
|
*/
|
|
CREATE UNIQUE INDEX assets_filename_idx ON assets (episode_id, filename);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Table: dirlist
|
|
*
|
|
* id primary key
|
|
* filename filename or path under directory 'eps'
|
|
*
|
|
*/
|
|
CREATE TABLE dirlist (
|
|
id integer PRIMARY KEY,
|
|
filename text NOT NULL
|
|
);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Table: hpr_repairs
|
|
*
|
|
* episode_id Primary key, foreign key for 'episodes'
|
|
* repaired Boolean showing whether the show has been repaired
|
|
* repair_date Date of repair
|
|
* notes Notes about any anomalies
|
|
* asset_count Number of assets (after ignoring transcripts, etc)
|
|
*
|
|
*/
|
|
CREATE TABLE hpr_repairs (
|
|
episode_id integer PRIMARY KEY REFERENCES episodes(id),
|
|
repaired integer default 0,
|
|
repair_date integer default 0,
|
|
notes text default null,
|
|
asset_count integer default 0
|
|
|
|
);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Table: ia_repairs
|
|
*
|
|
* episode_id Primary key, foreign key for 'episodes'
|
|
* repaired Boolean showing whether the show has been repaired
|
|
* repair_date Date of repair
|
|
* notes Notes about any anomalies
|
|
*
|
|
*/
|
|
CREATE TABLE ia_repairs (
|
|
episode_id integer PRIMARY KEY REFERENCES episodes(id),
|
|
repaired integer default 0,
|
|
repair_date integer default 0,
|
|
notes text default null
|
|
);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Table: show_host_xref
|
|
*
|
|
* episode_id Foreign key for 'episodes'
|
|
* hostid Host number from MySQL database
|
|
* hostname Host name from MySQL database
|
|
*
|
|
*/
|
|
CREATE TABLE "show_host_xref" (
|
|
"episode_id" integer,
|
|
"hostid" integer,
|
|
"hostname" text DEFAULT null,
|
|
FOREIGN KEY("episode_id") REFERENCES "episodes"("id")
|
|
);
|
|
|
|
/* ----------------------------------------------------------------------------
|
|
* Index: show_host_xref_idx
|
|
*
|
|
* Attempt to constrain duplicates in the show_host_xref table
|
|
*
|
|
*/
|
|
CREATE UNIQUE INDEX "show_host_xref_idx" ON "show_host_xref" (
|
|
"episode_id" ASC
|
|
);
|
|
|
|
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
* View: episodes_view
|
|
*
|
|
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
|
|
DROP VIEW IF EXISTS episodes_view;
|
|
CREATE VIEW episodes_view AS
|
|
SELECT
|
|
e.id,
|
|
e.rdate,
|
|
e.title,
|
|
e.summary,
|
|
e.uploaded,
|
|
e.has_files,
|
|
e.with_files,
|
|
e.with_derived,
|
|
e.archive_date,
|
|
e.IA_URL,
|
|
CASE e.item_last_updated
|
|
WHEN 0 THEN null
|
|
ELSE datetime(e.item_last_updated,'unixepoch') END AS item_last_updated,
|
|
e.notes,
|
|
count(a.url) AS asset_count,
|
|
group_concat(a.filename) AS assets
|
|
FROM episodes e
|
|
LEFT JOIN assets a ON e.id = a.episode_id
|
|
GROUP BY e.id
|
|
ORDER BY e.id;
|
|
|
|
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
* View: episodes_this_month (based on episodes_view)
|
|
*
|
|
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
|
|
DROP VIEW IF EXISTS episodes_this_month;
|
|
CREATE VIEW episodes_this_month AS
|
|
SELECT
|
|
*
|
|
FROM episodes_view
|
|
WHERE rdate between date('now','start of month')
|
|
AND date('now','start of month','+1 month','-1 day');
|
|
|
|
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
* View: episodes_last_month
|
|
*
|
|
* Selects the rows from 'episodes_view' for the previous month
|
|
*
|
|
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
|
|
DROP VIEW IF EXISTS episodes_last_month;
|
|
CREATE VIEW episodes_last_month AS
|
|
SELECT
|
|
*
|
|
FROM episodes_view
|
|
WHERE rdate between date('now','start of month','-1 month')
|
|
AND date('now','start of month','-1 day');
|
|
|
|
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
* View: episodes_next_month
|
|
*
|
|
* Selects the rows from 'episodes_view2' for the next month
|
|
*
|
|
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
|
|
DROP VIEW IF EXISTS episodes_next_month;
|
|
CREATE VIEW episodes_next_month AS
|
|
SELECT
|
|
*
|
|
FROM episodes_view
|
|
WHERE rdate between date('now','start of month','+1 month')
|
|
AND date('now','start of month','+2 month','-1 day');
|
|
|
|
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
* View: re_upload
|
|
*
|
|
* Selects the rows from 'episodes_view' which need to be re-uploaded. Shows the
|
|
* last 5 upolads to give context.
|
|
*
|
|
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
|
|
DROP VIEW IF EXISTS re_upload;
|
|
CREATE VIEW re_upload AS
|
|
SELECT
|
|
*
|
|
FROM episodes_view
|
|
WHERE id
|
|
BETWEEN
|
|
(SELECT min(id)-5
|
|
FROM episodes_view
|
|
WHERE id BETWEEN 871 AND 2429
|
|
AND uploaded = 1
|
|
AND with_derived = 0
|
|
GROUP BY id)
|
|
AND 2429;
|
|
|
|
/*
|
|
* CREATE VIEW re_upload AS
|
|
* SELECT
|
|
* *
|
|
* FROM episodes_view
|
|
* WHERE id BETWEEN 871 AND 2429
|
|
* AND uploaded = 1
|
|
* AND with_derived = 0;
|
|
*
|
|
*/
|
|
|
|
-- vim: syntax=sql:ts=8:sw=4:ai:tw=80:et:fo=tcrqn21:fdm=marker:comments+=b\:--
|
|
|