Files
hpr-knowledge-base/index.js
Lee Hanken 7c8efd2228 Initial commit: HPR Knowledge Base MCP Server
- MCP server with stdio transport for local use
- Search episodes, transcripts, hosts, and series
- 4,511 episodes with metadata and transcripts
- Data loader with in-memory JSON storage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-26 10:54:13 +00:00

567 lines
15 KiB
JavaScript
Executable File

#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
CallToolRequestSchema,
ListToolsRequestSchema,
ListResourcesRequestSchema,
ReadResourceRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';
import HPRDataLoader from './data-loader.js';
// Initialize data loader
const dataLoader = new HPRDataLoader();
await dataLoader.load();
// Create MCP server
const server = new Server(
{
name: 'hpr-knowledge-base',
version: '1.0.0',
},
{
capabilities: {
tools: {},
resources: {},
},
}
);
// Helper function to strip HTML tags
function stripHtml(html) {
return html
.replace(/<[^>]*>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.trim();
}
// Helper to format episode for display
function formatEpisode(episode, includeNotes = false) {
const host = dataLoader.getHost(episode.hostid);
const seriesInfo = episode.series !== 0 ? dataLoader.getSeries(episode.series) : null;
let result = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}
**Date:** ${episode.date}
**Host:** ${host?.host || 'Unknown'} (ID: ${episode.hostid})
**Duration:** ${Math.floor(episode.duration / 60)}:${String(episode.duration % 60).padStart(2, '0')}
**Tags:** ${episode.tags}
**License:** ${episode.license}
**Downloads:** ${episode.downloads}
## Summary
${episode.summary}`;
if (seriesInfo) {
result += `\n\n## Series
**${seriesInfo.name}**: ${stripHtml(seriesInfo.description)}`;
}
if (includeNotes && episode.notes) {
result += `\n\n## Host Notes\n${stripHtml(episode.notes)}`;
}
return result;
}
// List available resources
server.setRequestHandler(ListResourcesRequestSchema, async () => {
const stats = dataLoader.getStats();
return {
resources: [
{
uri: 'hpr://stats',
mimeType: 'text/plain',
name: 'HPR Statistics',
description: 'Overall statistics about the HPR knowledge base',
},
{
uri: 'hpr://episodes/recent',
mimeType: 'text/plain',
name: 'Recent Episodes',
description: 'List of 50 most recent HPR episodes',
},
{
uri: 'hpr://hosts/all',
mimeType: 'text/plain',
name: 'All Hosts',
description: 'List of all HPR hosts',
},
{
uri: 'hpr://series/all',
mimeType: 'text/plain',
name: 'All Series',
description: 'List of all HPR series',
},
],
};
});
// Read a resource
server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
const uri = request.params.uri;
if (uri === 'hpr://stats') {
const stats = dataLoader.getStats();
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# Hacker Public Radio Statistics
**Total Episodes:** ${stats.totalEpisodes}
**Total Hosts:** ${stats.totalHosts}
**Total Comments:** ${stats.totalComments}
**Total Series:** ${stats.totalSeries}
**Transcripts Available:** ${stats.totalTranscripts}
**Date Range:** ${stats.dateRange.earliest} to ${stats.dateRange.latest}
Hacker Public Radio is a community-driven podcast released under Creative Commons licenses.
All content is contributed by the community, for the community.`,
},
],
};
}
if (uri === 'hpr://episodes/recent') {
const recent = dataLoader.searchEpisodes('', { limit: 50 });
const text = recent.map(ep => {
const host = dataLoader.getHost(ep.hostid);
return `**HPR${String(ep.id).padStart(4, '0')}** (${ep.date}) - ${ep.title} by ${host?.host || 'Unknown'}`;
}).join('\n');
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# Recent Episodes\n\n${text}`,
},
],
};
}
if (uri === 'hpr://hosts/all') {
const hosts = dataLoader.hosts
.filter(h => h.valid === 1)
.map(h => {
const episodeCount = dataLoader.getEpisodesByHost(h.hostid).length;
return `**${h.host}** (ID: ${h.hostid}) - ${episodeCount} episodes`;
})
.join('\n');
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# All HPR Hosts\n\n${hosts}`,
},
],
};
}
if (uri === 'hpr://series/all') {
const series = dataLoader.series
.filter(s => s.valid === 1 && s.private === 0)
.map(s => {
const episodeCount = dataLoader.getEpisodesInSeries(s.id).length;
return `**${s.name}** (ID: ${s.id}) - ${episodeCount} episodes\n ${stripHtml(s.description)}`;
})
.join('\n\n');
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# All HPR Series\n\n${series}`,
},
],
};
}
throw new Error(`Unknown resource: ${uri}`);
});
// List available tools
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: 'search_episodes',
description: 'Search HPR episodes by keywords in title, summary, tags, or host notes. Can filter by host, series, tags, and date range.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query (searches title, summary, tags, and notes)',
},
limit: {
type: 'number',
description: 'Maximum number of results to return (default: 20)',
},
hostId: {
type: 'number',
description: 'Filter by host ID',
},
seriesId: {
type: 'number',
description: 'Filter by series ID',
},
tag: {
type: 'string',
description: 'Filter by tag',
},
fromDate: {
type: 'string',
description: 'Filter episodes from this date (YYYY-MM-DD)',
},
toDate: {
type: 'string',
description: 'Filter episodes to this date (YYYY-MM-DD)',
},
},
required: [],
},
},
{
name: 'get_episode',
description: 'Get detailed information about a specific HPR episode including transcript if available',
inputSchema: {
type: 'object',
properties: {
episodeId: {
type: 'number',
description: 'Episode ID number',
},
includeTranscript: {
type: 'boolean',
description: 'Include full transcript if available (default: true)',
},
includeComments: {
type: 'boolean',
description: 'Include community comments (default: true)',
},
},
required: ['episodeId'],
},
},
{
name: 'search_transcripts',
description: 'Search through episode transcripts for specific keywords or phrases',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query to find in transcripts',
},
limit: {
type: 'number',
description: 'Maximum number of episodes to return (default: 20)',
},
contextLines: {
type: 'number',
description: 'Number of lines of context around matches (default: 3)',
},
},
required: ['query'],
},
},
{
name: 'get_host_info',
description: 'Get information about an HPR host including all their episodes',
inputSchema: {
type: 'object',
properties: {
hostId: {
type: 'number',
description: 'Host ID number',
},
hostName: {
type: 'string',
description: 'Host name (will search if hostId not provided)',
},
includeEpisodes: {
type: 'boolean',
description: 'Include list of all episodes by this host (default: true)',
},
},
required: [],
},
},
{
name: 'get_series_info',
description: 'Get information about an HPR series including all episodes in the series',
inputSchema: {
type: 'object',
properties: {
seriesId: {
type: 'number',
description: 'Series ID number',
},
},
required: ['seriesId'],
},
},
],
};
});
// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
if (name === 'search_episodes') {
const results = dataLoader.searchEpisodes(args.query || '', {
limit: args.limit || 20,
hostId: args.hostId,
seriesId: args.seriesId,
tag: args.tag,
fromDate: args.fromDate,
toDate: args.toDate,
});
const text = results.length > 0
? results.map(ep => formatEpisode(ep, false)).join('\n\n---\n\n')
: 'No episodes found matching your search criteria.';
return {
content: [
{
type: 'text',
text: `# Search Results (${results.length} episodes found)\n\n${text}`,
},
],
};
}
if (name === 'get_episode') {
const episode = dataLoader.getEpisode(args.episodeId);
if (!episode) {
return {
content: [
{
type: 'text',
text: `Episode ${args.episodeId} not found.`,
},
],
};
}
let text = formatEpisode(episode, true);
// Add transcript if requested and available
if (args.includeTranscript !== false) {
const transcript = dataLoader.getTranscript(args.episodeId);
if (transcript) {
text += `\n\n## Transcript\n\n${transcript}`;
} else {
text += `\n\n## Transcript\n\n*No transcript available for this episode.*`;
}
}
// Add comments if requested
if (args.includeComments !== false) {
const comments = dataLoader.getCommentsForEpisode(args.episodeId);
if (comments.length > 0) {
text += `\n\n## Comments (${comments.length})\n\n`;
text += comments.map(c =>
`**${c.comment_author_name}** (${c.comment_timestamp})${c.comment_title ? ` - ${c.comment_title}` : ''}\n${c.comment_text}`
).join('\n\n---\n\n');
}
}
return {
content: [
{
type: 'text',
text,
},
],
};
}
if (name === 'search_transcripts') {
const results = dataLoader.searchTranscripts(args.query, {
limit: args.limit || 20,
contextLines: args.contextLines || 3,
});
if (results.length === 0) {
return {
content: [
{
type: 'text',
text: `No transcripts found containing "${args.query}".`,
},
],
};
}
const text = results.map(result => {
const { episode, matches } = result;
const host = dataLoader.getHost(episode.hostid);
let episodeText = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}
**Host:** ${host?.host || 'Unknown'} | **Date:** ${episode.date}
**Matches found:** ${matches.length}
`;
matches.forEach(match => {
episodeText += `### Line ${match.lineNumber}
\`\`\`
${match.context}
\`\`\`
`;
});
return episodeText;
}).join('\n---\n\n');
return {
content: [
{
type: 'text',
text: `# Transcript Search Results (${results.length} episodes)\n\nSearching for: "${args.query}"\n\n${text}`,
},
],
};
}
if (name === 'get_host_info') {
let host;
if (args.hostId) {
host = dataLoader.getHost(args.hostId);
} else if (args.hostName) {
const hosts = dataLoader.searchHosts(args.hostName);
host = hosts[0];
}
if (!host) {
return {
content: [
{
type: 'text',
text: 'Host not found.',
},
],
};
}
let text = `# ${host.host}
**Host ID:** ${host.hostid}
**Email:** ${host.email}
**License:** ${host.license}
**Profile:** ${stripHtml(host.profile)}
`;
if (args.includeEpisodes !== false) {
const episodes = dataLoader.getEpisodesByHost(host.hostid);
text += `\n**Total Episodes:** ${episodes.length}\n\n## Episodes\n\n`;
// Sort by date (newest first)
episodes.sort((a, b) => b.date.localeCompare(a.date));
text += episodes.map(ep =>
`**HPR${String(ep.id).padStart(4, '0')}** (${ep.date}) - ${ep.title}\n ${ep.summary}`
).join('\n\n');
}
return {
content: [
{
type: 'text',
text,
},
],
};
}
if (name === 'get_series_info') {
const series = dataLoader.getSeries(args.seriesId);
if (!series) {
return {
content: [
{
type: 'text',
text: `Series ${args.seriesId} not found.`,
},
],
};
}
const episodes = dataLoader.getEpisodesInSeries(args.seriesId);
let text = `# ${series.name}
**Series ID:** ${series.id}
**Description:** ${stripHtml(series.description)}
**Total Episodes:** ${episodes.length}
## Episodes in Series
`;
// Sort by date
episodes.sort((a, b) => a.date.localeCompare(b.date));
text += episodes.map((ep, index) => {
const host = dataLoader.getHost(ep.hostid);
return `${index + 1}. **HPR${String(ep.id).padStart(4, '0')}** (${ep.date}) - ${ep.title} by ${host?.host || 'Unknown'}\n ${ep.summary}`;
}).join('\n\n');
return {
content: [
{
type: 'text',
text,
},
],
};
}
throw new Error(`Unknown tool: ${name}`);
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error: ${error.message}`,
},
],
isError: true,
};
}
});
// Start the server
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error('HPR Knowledge Base MCP server running on stdio');
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});