Files
hpr-knowledge-base/index.js

688 lines
20 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
CallToolRequestSchema,
ListToolsRequestSchema,
ListResourcesRequestSchema,
ReadResourceRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';
import HPRDataLoader from './data-loader.js';
// Initialize data loader
const dataLoader = new HPRDataLoader();
await dataLoader.load();
// Create MCP server
const server = new Server(
{
name: 'hpr-knowledge-base',
version: '1.0.0',
},
{
capabilities: {
tools: {},
resources: {},
},
}
);
// Helper function to strip HTML tags
function stripHtml(html) {
return html
.replace(/<[^>]*>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.trim();
}
// Helper to format episode for display
function formatEpisode(episode, includeNotes = false) {
const host = dataLoader.getHost(episode.hostid);
const seriesInfo = episode.series !== 0 ? dataLoader.getSeries(episode.series) : null;
let result = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}
**Date:** ${episode.date}
**Host:** ${host?.host || 'Unknown'} (ID: ${episode.hostid})
**Duration:** ${Math.floor(episode.duration / 60)}:${String(episode.duration % 60).padStart(2, '0')}
**Tags:** ${episode.tags}
**License:** ${episode.license}
**Downloads:** ${episode.downloads}
## Summary
${episode.summary}`;
if (seriesInfo) {
result += `\n\n## Series
**${seriesInfo.name}**: ${stripHtml(seriesInfo.description)}`;
}
if (includeNotes && episode.notes) {
result += `\n\n## Host Notes\n${stripHtml(episode.notes)}`;
}
return result;
}
function formatTranscriptSearchResults(results, args) {
if (results.length === 0) {
return '';
}
const descriptorParts = [];
if (args.query) {
descriptorParts.push(`phrase="${args.query}"`);
}
if (Array.isArray(args.terms) && args.terms.length > 0) {
descriptorParts.push(`terms=[${args.terms.join(', ')}]`);
}
if (descriptorParts.length === 0) {
descriptorParts.push('"no explicit query provided"');
}
const firstSummary = results[0]?.matchSummary || {};
const matchMode = firstSummary.matchMode || 'phrase';
const contextLines = args.contextLines ?? 3;
const caseSensitive = args.caseSensitive ? 'yes' : 'no';
const wholeWord = args.wholeWord ? 'yes' : 'no';
const maxMatches = args.maxMatchesPerEpisode ?? 5;
const hostFilters = [];
if (args.hostId) hostFilters.push(`ID ${args.hostId}`);
if (args.hostName) hostFilters.push(`name "${args.hostName}"`);
let text = `# Transcript Search Results (${results.length} episodes)\n\n`;
text += `Searching for: ${descriptorParts.join(' | ')}\n`;
text += `Match mode: ${matchMode} | Context lines: ${contextLines} | Case sensitive: ${caseSensitive} | Whole word: ${wholeWord}\n`;
text += `Maximum matches per episode: ${maxMatches}\n`;
if (hostFilters.length > 0) {
text += `Host filter: ${hostFilters.join(' & ')}\n`;
}
text += '\n## Summary\n';
text += results.map(result => {
const host = dataLoader.getHost(result.episode.hostid);
const matchedTerms = result.matchSummary.matchedTerms.length > 0
? result.matchSummary.matchedTerms.join(', ')
: 'N/A';
const termCounts = Object.entries(result.matchSummary.termHitCounts || {});
const termCountText = termCounts.length > 0
? termCounts.map(([term, count]) => `${term}: ${count}`).join(', ')
: null;
const truncatedNote = result.matchSummary.truncated ? ' (truncated)' : '';
let line = `- HPR${String(result.episode.id).padStart(4, '0')}: ${result.episode.title}${result.matchSummary.totalMatches} match${result.matchSummary.totalMatches === 1 ? '' : 'es'}${truncatedNote}; terms: ${matchedTerms}`;
if (termCountText) {
line += ` (${termCountText})`;
}
line += ` | Host: ${host?.host || 'Unknown'} (${result.episode.date})`;
return line;
}).join('\n');
text += '\n\n';
results.forEach(result => {
const host = dataLoader.getHost(result.episode.hostid);
const matchedTerms = result.matchSummary.matchedTerms.length > 0
? result.matchSummary.matchedTerms.join(', ')
: 'N/A';
const termCounts = Object.entries(result.matchSummary.termHitCounts || {});
const termCountText = termCounts.length > 0
? termCounts.map(([term, count]) => `${term}: ${count}`).join(', ')
: null;
text += `## HPR${String(result.episode.id).padStart(4, '0')}: ${result.episode.title}
**Host:** ${host?.host || 'Unknown'} | **Date:** ${result.episode.date}
**Matched terms:** ${matchedTerms}
**Matches captured:** ${result.matchSummary.totalMatches}${result.matchSummary.truncated ? ' (additional matches omitted after reaching limit)' : ''}
`;
if (termCountText) {
text += `**Term counts:** ${termCountText}\n`;
}
text += '\n';
result.matches.forEach((match, index) => {
const termInfo = match.terms && match.terms.length > 0
? ` | terms: ${match.terms.join(', ')}`
: '';
text += `### Match ${index + 1} (line ${match.lineNumber}${termInfo})
\`\`\`
${match.context}
\`\`\`
`;
});
});
return text;
}
// List available resources
server.setRequestHandler(ListResourcesRequestSchema, async () => {
const stats = dataLoader.getStats();
return {
resources: [
{
uri: 'hpr://stats',
mimeType: 'text/plain',
name: 'HPR Statistics',
description: 'Overall statistics about the HPR knowledge base',
},
{
uri: 'hpr://episodes/recent',
mimeType: 'text/plain',
name: 'Recent Episodes',
description: 'List of 50 most recent HPR episodes',
},
{
uri: 'hpr://hosts/all',
mimeType: 'text/plain',
name: 'All Hosts',
description: 'List of all HPR hosts',
},
{
uri: 'hpr://series/all',
mimeType: 'text/plain',
name: 'All Series',
description: 'List of all HPR series',
},
],
};
});
// Read a resource
server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
const uri = request.params.uri;
if (uri === 'hpr://stats') {
const stats = dataLoader.getStats();
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# Hacker Public Radio Statistics
**Total Episodes:** ${stats.totalEpisodes}
**Total Hosts:** ${stats.totalHosts}
**Total Comments:** ${stats.totalComments}
**Total Series:** ${stats.totalSeries}
**Transcripts Available:** ${stats.totalTranscripts}
**Date Range:** ${stats.dateRange.earliest} to ${stats.dateRange.latest}
Hacker Public Radio is a community-driven podcast released under Creative Commons licenses.
All content is contributed by the community, for the community.`,
},
],
};
}
if (uri === 'hpr://episodes/recent') {
const recent = dataLoader.searchEpisodes('', { limit: 50 });
const text = recent.map(ep => {
const host = dataLoader.getHost(ep.hostid);
return `**HPR${String(ep.id).padStart(4, '0')}** (${ep.date}) - ${ep.title} by ${host?.host || 'Unknown'}`;
}).join('\n');
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# Recent Episodes\n\n${text}`,
},
],
};
}
if (uri === 'hpr://hosts/all') {
const hosts = dataLoader.hosts
.filter(h => h.valid === 1)
.map(h => {
const episodeCount = dataLoader.getEpisodesByHost(h.hostid).length;
return `**${h.host}** (ID: ${h.hostid}) - ${episodeCount} episodes`;
})
.join('\n');
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# All HPR Hosts\n\n${hosts}`,
},
],
};
}
if (uri === 'hpr://series/all') {
const series = dataLoader.series
.filter(s => s.valid === 1 && s.private === 0)
.map(s => {
const episodeCount = dataLoader.getEpisodesInSeries(s.id).length;
return `**${s.name}** (ID: ${s.id}) - ${episodeCount} episodes\n ${stripHtml(s.description)}`;
})
.join('\n\n');
return {
contents: [
{
uri,
mimeType: 'text/plain',
text: `# All HPR Series\n\n${series}`,
},
],
};
}
throw new Error(`Unknown resource: ${uri}`);
});
// List available tools
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: 'search_episodes',
description: 'Search HPR episodes by keywords in title, summary, tags, or host notes. Can filter by host, series, tags, and date range.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query (searches title, summary, tags, and notes)',
},
limit: {
type: 'number',
description: 'Maximum number of results to return (default: 20)',
},
hostId: {
type: 'number',
description: 'Filter by host ID',
},
seriesId: {
type: 'number',
description: 'Filter by series ID',
},
tag: {
type: 'string',
description: 'Filter by tag',
},
fromDate: {
type: 'string',
description: 'Filter episodes from this date (YYYY-MM-DD)',
},
toDate: {
type: 'string',
description: 'Filter episodes to this date (YYYY-MM-DD)',
},
},
required: [],
},
},
{
name: 'get_episode',
description: 'Get detailed information about a specific HPR episode including transcript if available',
inputSchema: {
type: 'object',
properties: {
episodeId: {
type: 'number',
description: 'Episode ID number',
},
includeTranscript: {
type: 'boolean',
description: 'Include full transcript if available (default: true)',
},
includeComments: {
type: 'boolean',
description: 'Include community comments (default: true)',
},
},
required: ['episodeId'],
},
},
{
name: 'search_transcripts',
description: 'Search through episode transcripts using phrases or multiple terms with AND/OR matching and optional host filters',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search phrase to find in transcripts. Combine with terms/matchMode for advanced searches.',
},
terms: {
type: 'array',
items: { type: 'string' },
description: 'Explicit list of terms to search for; useful when pairing with matchMode "any" or "all".',
},
matchMode: {
type: 'string',
enum: ['any', 'all', 'phrase'],
description: 'How to interpret the query/terms. "phrase" (default) matches the phrase exactly, "any" matches if any term is present, "all" requires every term.',
},
limit: {
type: 'number',
description: 'Maximum number of episodes to return (default: 20)',
},
contextLines: {
type: 'number',
description: 'Number of lines of context around matches (default: 3)',
},
hostId: {
type: 'number',
description: 'Restrict matches to a given host ID.',
},
hostName: {
type: 'string',
description: 'Restrict matches to hosts whose name contains this value.',
},
caseSensitive: {
type: 'boolean',
description: 'Perform a case-sensitive search (default: false).',
},
wholeWord: {
type: 'boolean',
description: 'Match whole words only (default: false).',
},
maxMatchesPerEpisode: {
type: 'number',
description: 'Maximum number of excerpt matches to include per episode (default: 5).',
},
},
required: [],
},
},
{
name: 'get_host_info',
description: 'Get information about an HPR host including all their episodes',
inputSchema: {
type: 'object',
properties: {
hostId: {
type: 'number',
description: 'Host ID number',
},
hostName: {
type: 'string',
description: 'Host name (will search if hostId not provided)',
},
includeEpisodes: {
type: 'boolean',
description: 'Include list of all episodes by this host (default: true)',
},
},
required: [],
},
},
{
name: 'get_series_info',
description: 'Get information about an HPR series including all episodes in the series',
inputSchema: {
type: 'object',
properties: {
seriesId: {
type: 'number',
description: 'Series ID number',
},
},
required: ['seriesId'],
},
},
],
};
});
// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
if (name === 'search_episodes') {
const results = dataLoader.searchEpisodes(args.query || '', {
limit: args.limit || 20,
hostId: args.hostId,
seriesId: args.seriesId,
tag: args.tag,
fromDate: args.fromDate,
toDate: args.toDate,
});
const text = results.length > 0
? results.map(ep => formatEpisode(ep, false)).join('\n\n---\n\n')
: 'No episodes found matching your search criteria.';
return {
content: [
{
type: 'text',
text: `# Search Results (${results.length} episodes found)\n\n${text}`,
},
],
};
}
if (name === 'get_episode') {
const episode = dataLoader.getEpisode(args.episodeId);
if (!episode) {
return {
content: [
{
type: 'text',
text: `Episode ${args.episodeId} not found.`,
},
],
};
}
let text = formatEpisode(episode, true);
// Add transcript if requested and available
if (args.includeTranscript !== false) {
const transcript = dataLoader.getTranscript(args.episodeId);
if (transcript) {
text += `\n\n## Transcript\n\n${transcript}`;
} else {
text += `\n\n## Transcript\n\n*No transcript available for this episode.*`;
}
}
// Add comments if requested
if (args.includeComments !== false) {
const comments = dataLoader.getCommentsForEpisode(args.episodeId);
if (comments.length > 0) {
text += `\n\n## Comments (${comments.length})\n\n`;
text += comments.map(c =>
`**${c.comment_author_name}** (${c.comment_timestamp})${c.comment_title ? ` - ${c.comment_title}` : ''}\n${c.comment_text}`
).join('\n\n---\n\n');
}
}
return {
content: [
{
type: 'text',
text,
},
],
};
}
if (name === 'search_transcripts') {
const searchOptions = {
limit: args.limit || 20,
contextLines: args.contextLines ?? 3,
terms: args.terms,
matchMode: args.matchMode,
hostId: args.hostId,
hostName: args.hostName,
caseSensitive: args.caseSensitive,
wholeWord: args.wholeWord,
maxMatchesPerEpisode: args.maxMatchesPerEpisode ?? 5,
};
const results = dataLoader.searchTranscripts(args.query || '', searchOptions);
if (results.length === 0) {
const descriptorParts = [];
if (args.query) descriptorParts.push(`phrase "${args.query}"`);
if (Array.isArray(args.terms) && args.terms.length > 0) descriptorParts.push(`terms [${args.terms.join(', ')}]`);
if (args.hostId || args.hostName) descriptorParts.push('host filter applied');
const description = descriptorParts.length > 0 ? descriptorParts.join(', ') : 'the provided criteria';
return {
content: [
{
type: 'text',
text: `No transcripts found matching ${description}.`,
},
],
};
}
const formatArgs = {
...args,
contextLines: searchOptions.contextLines,
maxMatchesPerEpisode: searchOptions.maxMatchesPerEpisode,
};
const text = formatTranscriptSearchResults(results, formatArgs);
return {
content: [
{
type: 'text',
text,
},
],
};
}
if (name === 'get_host_info') {
let host;
if (args.hostId) {
host = dataLoader.getHost(args.hostId);
} else if (args.hostName) {
const hosts = dataLoader.searchHosts(args.hostName);
host = hosts[0];
}
if (!host) {
return {
content: [
{
type: 'text',
text: 'Host not found.',
},
],
};
}
let text = `# ${host.host}
**Host ID:** ${host.hostid}
**Email:** ${host.email}
**License:** ${host.license}
**Profile:** ${stripHtml(host.profile)}
`;
if (args.includeEpisodes !== false) {
const episodes = dataLoader.getEpisodesByHost(host.hostid);
text += `\n**Total Episodes:** ${episodes.length}\n\n## Episodes\n\n`;
// Sort by date (newest first)
episodes.sort((a, b) => b.date.localeCompare(a.date));
text += episodes.map(ep =>
`**HPR${String(ep.id).padStart(4, '0')}** (${ep.date}) - ${ep.title}\n ${ep.summary}`
).join('\n\n');
}
return {
content: [
{
type: 'text',
text,
},
],
};
}
if (name === 'get_series_info') {
const series = dataLoader.getSeries(args.seriesId);
if (!series) {
return {
content: [
{
type: 'text',
text: `Series ${args.seriesId} not found.`,
},
],
};
}
const episodes = dataLoader.getEpisodesInSeries(args.seriesId);
let text = `# ${series.name}
**Series ID:** ${series.id}
**Description:** ${stripHtml(series.description)}
**Total Episodes:** ${episodes.length}
## Episodes in Series
`;
// Sort by date
episodes.sort((a, b) => a.date.localeCompare(b.date));
text += episodes.map((ep, index) => {
const host = dataLoader.getHost(ep.hostid);
return `${index + 1}. **HPR${String(ep.id).padStart(4, '0')}** (${ep.date}) - ${ep.title} by ${host?.host || 'Unknown'}\n ${ep.summary}`;
}).join('\n\n');
return {
content: [
{
type: 'text',
text,
},
],
};
}
throw new Error(`Unknown tool: ${name}`);
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error: ${error.message}`,
},
],
isError: true,
};
}
});
// Start the server
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error('HPR Knowledge Base MCP server running on stdio');
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});