- MCP server with stdio transport for local use - Search episodes, transcripts, hosts, and series - 4,511 episodes with metadata and transcripts - Data loader with in-memory JSON storage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
251 lines
6.6 KiB
JavaScript
251 lines
6.6 KiB
JavaScript
import { readFileSync, readdirSync } from 'fs';
|
|
import { join, dirname } from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
|
|
class HPRDataLoader {
|
|
constructor() {
|
|
this.episodes = [];
|
|
this.hosts = [];
|
|
this.comments = [];
|
|
this.series = [];
|
|
this.transcripts = new Map(); // Map of episode id to transcript text
|
|
}
|
|
|
|
/**
|
|
* Load all data from JSON files and transcripts
|
|
*/
|
|
async load() {
|
|
console.error('Loading HPR data...');
|
|
|
|
// Load JSON files
|
|
this.episodes = this.loadJSON('hpr_metadata/episodes.json');
|
|
this.hosts = this.loadJSON('hpr_metadata/hosts.json');
|
|
this.comments = this.loadJSON('hpr_metadata/comments.json');
|
|
this.series = this.loadJSON('hpr_metadata/series.json');
|
|
|
|
console.error(`Loaded ${this.episodes.length} episodes`);
|
|
console.error(`Loaded ${this.hosts.length} hosts`);
|
|
console.error(`Loaded ${this.comments.length} comments`);
|
|
console.error(`Loaded ${this.series.length} series`);
|
|
|
|
// Load transcripts
|
|
this.loadTranscripts();
|
|
|
|
console.error(`Loaded ${this.transcripts.size} transcripts`);
|
|
console.error('HPR data loading complete!');
|
|
}
|
|
|
|
/**
|
|
* Load a JSON file
|
|
*/
|
|
loadJSON(relativePath) {
|
|
const filePath = join(__dirname, relativePath);
|
|
try {
|
|
const data = readFileSync(filePath, 'utf-8');
|
|
return JSON.parse(data);
|
|
} catch (error) {
|
|
console.error(`Error loading ${relativePath}:`, error.message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load all transcript files
|
|
*/
|
|
loadTranscripts() {
|
|
const transcriptsDir = join(__dirname, 'hpr_transcripts');
|
|
try {
|
|
const files = readdirSync(transcriptsDir);
|
|
|
|
for (const file of files) {
|
|
if (file.endsWith('.txt')) {
|
|
// Extract episode ID from filename (e.g., hpr0016.txt -> 16)
|
|
const match = file.match(/hpr(\d+)\.txt/);
|
|
if (match) {
|
|
const episodeId = parseInt(match[1], 10);
|
|
const filePath = join(transcriptsDir, file);
|
|
try {
|
|
const content = readFileSync(filePath, 'utf-8');
|
|
this.transcripts.set(episodeId, content);
|
|
} catch (error) {
|
|
console.error(`Error loading transcript ${file}:`, error.message);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('Error loading transcripts directory:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get episode by ID
|
|
*/
|
|
getEpisode(id) {
|
|
return this.episodes.find(ep => ep.id === id);
|
|
}
|
|
|
|
/**
|
|
* Get host by ID
|
|
*/
|
|
getHost(id) {
|
|
return this.hosts.find(host => host.hostid === id);
|
|
}
|
|
|
|
/**
|
|
* Get series by ID
|
|
*/
|
|
getSeries(id) {
|
|
return this.series.find(s => s.id === id);
|
|
}
|
|
|
|
/**
|
|
* Get transcript for episode
|
|
*/
|
|
getTranscript(episodeId) {
|
|
return this.transcripts.get(episodeId);
|
|
}
|
|
|
|
/**
|
|
* Get comments for episode
|
|
*/
|
|
getCommentsForEpisode(episodeId) {
|
|
return this.comments.filter(c => c.eps_id === episodeId);
|
|
}
|
|
|
|
/**
|
|
* Get episodes by host
|
|
*/
|
|
getEpisodesByHost(hostId) {
|
|
return this.episodes.filter(ep => ep.hostid === hostId);
|
|
}
|
|
|
|
/**
|
|
* Get episodes in a series
|
|
*/
|
|
getEpisodesInSeries(seriesId) {
|
|
return this.episodes.filter(ep => ep.series === seriesId);
|
|
}
|
|
|
|
/**
|
|
* Search episodes by keyword in title, summary, or tags
|
|
*/
|
|
searchEpisodes(query, options = {}) {
|
|
const {
|
|
limit = 20,
|
|
hostId = null,
|
|
seriesId = null,
|
|
tag = null,
|
|
fromDate = null,
|
|
toDate = null
|
|
} = options;
|
|
|
|
const queryLower = query.toLowerCase();
|
|
let results = this.episodes.filter(ep => {
|
|
// Basic text search
|
|
const matchesQuery = !query ||
|
|
ep.title.toLowerCase().includes(queryLower) ||
|
|
ep.summary.toLowerCase().includes(queryLower) ||
|
|
ep.tags.toLowerCase().includes(queryLower) ||
|
|
ep.notes.toLowerCase().includes(queryLower);
|
|
|
|
// Filter by host
|
|
const matchesHost = !hostId || ep.hostid === hostId;
|
|
|
|
// Filter by series
|
|
const matchesSeries = seriesId === null || ep.series === seriesId;
|
|
|
|
// Filter by tag
|
|
const matchesTag = !tag || ep.tags.toLowerCase().includes(tag.toLowerCase());
|
|
|
|
// Filter by date range
|
|
const matchesDateRange = (!fromDate || ep.date >= fromDate) &&
|
|
(!toDate || ep.date <= toDate);
|
|
|
|
return matchesQuery && matchesHost && matchesSeries && matchesTag && matchesDateRange;
|
|
});
|
|
|
|
// Sort by date (newest first)
|
|
results.sort((a, b) => b.date.localeCompare(a.date));
|
|
|
|
return results.slice(0, limit);
|
|
}
|
|
|
|
/**
|
|
* Search transcripts by keyword
|
|
*/
|
|
searchTranscripts(query, options = {}) {
|
|
const { limit = 20, contextLines = 3 } = options;
|
|
const queryLower = query.toLowerCase();
|
|
const results = [];
|
|
|
|
for (const [episodeId, transcript] of this.transcripts) {
|
|
const lines = transcript.split('\n');
|
|
const matches = [];
|
|
|
|
// Find all matching lines
|
|
lines.forEach((line, index) => {
|
|
if (line.toLowerCase().includes(queryLower)) {
|
|
// Get context around the match
|
|
const start = Math.max(0, index - contextLines);
|
|
const end = Math.min(lines.length, index + contextLines + 1);
|
|
const context = lines.slice(start, end).join('\n');
|
|
|
|
matches.push({
|
|
lineNumber: index + 1,
|
|
line: line.trim(),
|
|
context: context
|
|
});
|
|
}
|
|
});
|
|
|
|
if (matches.length > 0) {
|
|
const episode = this.getEpisode(episodeId);
|
|
if (episode) {
|
|
results.push({
|
|
episode,
|
|
matches: matches.slice(0, 5) // Limit matches per episode
|
|
});
|
|
}
|
|
}
|
|
|
|
if (results.length >= limit) break;
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Search hosts by name or email
|
|
*/
|
|
searchHosts(query) {
|
|
const queryLower = query.toLowerCase();
|
|
return this.hosts.filter(host =>
|
|
host.host.toLowerCase().includes(queryLower) ||
|
|
host.email.toLowerCase().includes(queryLower)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get statistics
|
|
*/
|
|
getStats() {
|
|
return {
|
|
totalEpisodes: this.episodes.length,
|
|
totalHosts: this.hosts.length,
|
|
totalComments: this.comments.length,
|
|
totalSeries: this.series.length,
|
|
totalTranscripts: this.transcripts.size,
|
|
dateRange: {
|
|
earliest: this.episodes.reduce((min, ep) => ep.date < min ? ep.date : min, this.episodes[0]?.date || ''),
|
|
latest: this.episodes.reduce((max, ep) => ep.date > max ? ep.date : max, this.episodes[0]?.date || '')
|
|
}
|
|
};
|
|
}
|
|
}
|
|
|
|
export default HPRDataLoader;
|