use fuzzy matching for host and episode searches
This commit is contained in:
45
README.md
45
README.md
@@ -11,9 +11,11 @@ Hacker Public Radio is a community-driven podcast where hosts contribute content
|
|||||||
This MCP server provides:
|
This MCP server provides:
|
||||||
|
|
||||||
- **Episode Search**: Search through thousands of HPR episodes by title, summary, tags, or host notes
|
- **Episode Search**: Search through thousands of HPR episodes by title, summary, tags, or host notes
|
||||||
- **Transcript Search**: Full-text search across all episode transcripts
|
- **Fuzzy Matching**: Automatically handles typos and misspellings (e.g., "linx" finds "linux", "pythoon" finds "python")
|
||||||
|
- **Transcript Search**: Full-text search across all episode transcripts with flexible matching modes
|
||||||
- **Episode Details**: Get complete information about any episode including transcript and comments
|
- **Episode Details**: Get complete information about any episode including transcript and comments
|
||||||
- **Host Information**: Look up hosts and see all their contributions
|
- **Host Information**: Look up hosts and see all their contributions
|
||||||
|
- **Fuzzy Matching**: Handles name variations and typos (e.g., "klattu" finds "Klaatu")
|
||||||
- **Series Browsing**: Explore mini-series of related episodes
|
- **Series Browsing**: Explore mini-series of related episodes
|
||||||
- **Statistics**: View overall HPR statistics and recent episodes
|
- **Statistics**: View overall HPR statistics and recent episodes
|
||||||
|
|
||||||
@@ -189,6 +191,45 @@ Get information about a series and all its episodes.
|
|||||||
Get information about series 4 (Databases series)
|
Get information about series 4 (Databases series)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Fuzzy Matching
|
||||||
|
|
||||||
|
The server includes intelligent fuzzy matching for episode and host searches to handle typos and misspellings.
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. **Exact Match First**: The server always tries exact substring matching first for speed
|
||||||
|
2. **Fuzzy Fallback**: If no exact matches are found, it falls back to fuzzy matching using Levenshtein distance
|
||||||
|
3. **Match Indicators**: Results include indicators showing whether they're exact or fuzzy matches
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
**Host Search:**
|
||||||
|
- Query: `"klattu"` → Finds: **Klaatu** *(fuzzy match, distance: 1)*
|
||||||
|
- Query: `"ken"` → Finds: **Ken Fallon** *(exact match)*
|
||||||
|
|
||||||
|
**Episode Search:**
|
||||||
|
- Query: `"pythoon"` → Finds episodes with **python** in the title *(fuzzy match, distance: 1)*
|
||||||
|
- Query: `"linx"` → Finds episodes with **linux** *(may match exactly in summary/tags, or fuzzy in title)*
|
||||||
|
|
||||||
|
### Distance Thresholds
|
||||||
|
|
||||||
|
- **Hosts**: Maximum distance of 2 characters (handles 1-2 typos)
|
||||||
|
- **Episodes**: Maximum distance of 3 characters (more lenient for longer titles)
|
||||||
|
|
||||||
|
### What the AI Agent Sees
|
||||||
|
|
||||||
|
When fuzzy matching is used, results include:
|
||||||
|
- `matchType: 'exact'` or `matchType: 'fuzzy'`
|
||||||
|
- `matchDistance: N` (for fuzzy matches, indicating how many character edits were needed)
|
||||||
|
|
||||||
|
This allows AI agents to provide context to users, such as: *"I found results for 'klaatu' (you typed 'klattu')"*
|
||||||
|
|
||||||
|
### Technical Details
|
||||||
|
|
||||||
|
The fuzzy matching uses the **Levenshtein distance algorithm**, which counts the minimum number of single-character edits (insertions, deletions, substitutions) needed to change one string into another.
|
||||||
|
|
||||||
|
**Note**: Transcript search uses regex-based matching and does not use fuzzy matching, as the flexible regex patterns already handle many variations.
|
||||||
|
|
||||||
## Available Resources
|
## Available Resources
|
||||||
|
|
||||||
### `hpr://stats`
|
### `hpr://stats`
|
||||||
@@ -314,7 +355,7 @@ The Hacker Public Radio content itself is released under various Creative Common
|
|||||||
|
|
||||||
Contributions are welcome! This server can be extended with:
|
Contributions are welcome! This server can be extended with:
|
||||||
|
|
||||||
- Advanced search features (fuzzy matching, relevance ranking)
|
- Advanced search features (relevance ranking, semantic search)
|
||||||
- Tag cloud generation
|
- Tag cloud generation
|
||||||
- Episode recommendations
|
- Episode recommendations
|
||||||
- Audio file access
|
- Audio file access
|
||||||
|
|||||||
156
data-loader.js
156
data-loader.js
@@ -9,6 +9,45 @@ function escapeRegExp(string) {
|
|||||||
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate Levenshtein distance between two strings
|
||||||
|
* Returns the minimum number of single-character edits (insertions, deletions, substitutions)
|
||||||
|
* needed to change one string into the other.
|
||||||
|
*/
|
||||||
|
function levenshteinDistance(a, b) {
|
||||||
|
if (a.length === 0) return b.length;
|
||||||
|
if (b.length === 0) return a.length;
|
||||||
|
|
||||||
|
const matrix = [];
|
||||||
|
|
||||||
|
// Initialize first column
|
||||||
|
for (let i = 0; i <= b.length; i++) {
|
||||||
|
matrix[i] = [i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize first row
|
||||||
|
for (let j = 0; j <= a.length; j++) {
|
||||||
|
matrix[0][j] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill in the rest of the matrix
|
||||||
|
for (let i = 1; i <= b.length; i++) {
|
||||||
|
for (let j = 1; j <= a.length; j++) {
|
||||||
|
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
||||||
|
matrix[i][j] = matrix[i - 1][j - 1];
|
||||||
|
} else {
|
||||||
|
matrix[i][j] = Math.min(
|
||||||
|
matrix[i - 1][j - 1] + 1, // substitution
|
||||||
|
matrix[i][j - 1] + 1, // insertion
|
||||||
|
matrix[i - 1][j] + 1 // deletion
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matrix[b.length][a.length];
|
||||||
|
}
|
||||||
|
|
||||||
class HPRDataLoader {
|
class HPRDataLoader {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.episodes = [];
|
this.episodes = [];
|
||||||
@@ -135,7 +174,8 @@ class HPRDataLoader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search episodes by keyword in title, summary, or tags
|
* Search episodes by keyword in title, summary, or tags with fuzzy matching fallback
|
||||||
|
* Returns episodes with matchType indicator ('exact' or 'fuzzy')
|
||||||
*/
|
*/
|
||||||
searchEpisodes(query, options = {}) {
|
searchEpisodes(query, options = {}) {
|
||||||
const {
|
const {
|
||||||
@@ -144,37 +184,78 @@ class HPRDataLoader {
|
|||||||
seriesId = null,
|
seriesId = null,
|
||||||
tag = null,
|
tag = null,
|
||||||
fromDate = null,
|
fromDate = null,
|
||||||
toDate = null
|
toDate = null,
|
||||||
|
maxDistance = 3 // More lenient for longer episode titles
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const queryLower = query.toLowerCase();
|
const queryLower = query.toLowerCase();
|
||||||
|
|
||||||
|
// Helper to check if episode matches filters (excluding query)
|
||||||
|
const matchesFilters = (ep) => {
|
||||||
|
const matchesHost = !hostId || ep.hostid === hostId;
|
||||||
|
const matchesSeries = seriesId === null || ep.series === seriesId;
|
||||||
|
const matchesTag = !tag || ep.tags.toLowerCase().includes(tag.toLowerCase());
|
||||||
|
const matchesDateRange = (!fromDate || ep.date >= fromDate) &&
|
||||||
|
(!toDate || ep.date <= toDate);
|
||||||
|
return matchesHost && matchesSeries && matchesTag && matchesDateRange;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Try exact substring match first (fast path)
|
||||||
let results = this.episodes.filter(ep => {
|
let results = this.episodes.filter(ep => {
|
||||||
// Basic text search
|
|
||||||
const matchesQuery = !query ||
|
const matchesQuery = !query ||
|
||||||
ep.title.toLowerCase().includes(queryLower) ||
|
ep.title.toLowerCase().includes(queryLower) ||
|
||||||
ep.summary.toLowerCase().includes(queryLower) ||
|
ep.summary.toLowerCase().includes(queryLower) ||
|
||||||
ep.tags.toLowerCase().includes(queryLower) ||
|
ep.tags.toLowerCase().includes(queryLower) ||
|
||||||
ep.notes.toLowerCase().includes(queryLower);
|
ep.notes.toLowerCase().includes(queryLower);
|
||||||
|
|
||||||
// Filter by host
|
return matchesQuery && matchesFilters(ep);
|
||||||
const matchesHost = !hostId || ep.hostid === hostId;
|
}).map(ep => ({
|
||||||
|
...ep,
|
||||||
|
matchType: 'exact'
|
||||||
|
}));
|
||||||
|
|
||||||
// Filter by series
|
// If no exact matches and we have a query, try fuzzy match on title
|
||||||
const matchesSeries = seriesId === null || ep.series === seriesId;
|
if (results.length === 0 && query && query.trim().length > 0) {
|
||||||
|
const fuzzyResults = this.episodes
|
||||||
|
.filter(matchesFilters)
|
||||||
|
.map(ep => {
|
||||||
|
// Check if any word in the title is close to the query
|
||||||
|
const titleWords = ep.title.toLowerCase().split(/\s+/);
|
||||||
|
let minDistance = Infinity;
|
||||||
|
|
||||||
// Filter by tag
|
for (const word of titleWords) {
|
||||||
const matchesTag = !tag || ep.tags.toLowerCase().includes(tag.toLowerCase());
|
const distance = levenshteinDistance(queryLower, word);
|
||||||
|
if (distance < minDistance) {
|
||||||
|
minDistance = distance;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Filter by date range
|
return {
|
||||||
const matchesDateRange = (!fromDate || ep.date >= fromDate) &&
|
episode: ep,
|
||||||
(!toDate || ep.date <= toDate);
|
distance: minDistance
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter(result => result.distance <= maxDistance)
|
||||||
|
.sort((a, b) => a.distance - b.distance)
|
||||||
|
.map(result => ({
|
||||||
|
...result.episode,
|
||||||
|
matchType: 'fuzzy',
|
||||||
|
matchDistance: result.distance
|
||||||
|
}));
|
||||||
|
|
||||||
return matchesQuery && matchesHost && matchesSeries && matchesTag && matchesDateRange;
|
results = fuzzyResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by date (newest first), maintaining match quality
|
||||||
|
results.sort((a, b) => {
|
||||||
|
// If both are fuzzy matches, sort by distance first, then date
|
||||||
|
if (a.matchType === 'fuzzy' && b.matchType === 'fuzzy') {
|
||||||
|
const distDiff = (a.matchDistance || 0) - (b.matchDistance || 0);
|
||||||
|
if (distDiff !== 0) return distDiff;
|
||||||
|
}
|
||||||
|
return b.date.localeCompare(a.date);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Sort by date (newest first)
|
|
||||||
results.sort((a, b) => b.date.localeCompare(a.date));
|
|
||||||
|
|
||||||
return results.slice(0, limit);
|
return results.slice(0, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -329,14 +410,49 @@ class HPRDataLoader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search hosts by name or email
|
* Search hosts by name or email with fuzzy matching fallback
|
||||||
|
* Returns hosts with matchType indicator ('exact' or 'fuzzy')
|
||||||
*/
|
*/
|
||||||
searchHosts(query) {
|
searchHosts(query, options = {}) {
|
||||||
|
const { maxDistance = 2 } = options;
|
||||||
const queryLower = query.toLowerCase();
|
const queryLower = query.toLowerCase();
|
||||||
return this.hosts.filter(host =>
|
|
||||||
|
// Try exact substring match first (fast path)
|
||||||
|
const exactMatches = this.hosts.filter(host =>
|
||||||
host.host.toLowerCase().includes(queryLower) ||
|
host.host.toLowerCase().includes(queryLower) ||
|
||||||
host.email.toLowerCase().includes(queryLower)
|
host.email.toLowerCase().includes(queryLower)
|
||||||
);
|
).map(host => ({
|
||||||
|
...host,
|
||||||
|
matchType: 'exact'
|
||||||
|
}));
|
||||||
|
|
||||||
|
if (exactMatches.length > 0) {
|
||||||
|
return exactMatches;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to fuzzy match if no exact matches
|
||||||
|
const fuzzyMatches = this.hosts
|
||||||
|
.map(host => {
|
||||||
|
const hostLower = host.host.toLowerCase();
|
||||||
|
const emailLower = host.email.toLowerCase();
|
||||||
|
const hostDistance = levenshteinDistance(queryLower, hostLower);
|
||||||
|
const emailDistance = levenshteinDistance(queryLower, emailLower);
|
||||||
|
const minDistance = Math.min(hostDistance, emailDistance);
|
||||||
|
|
||||||
|
return {
|
||||||
|
host,
|
||||||
|
distance: minDistance
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter(result => result.distance <= maxDistance)
|
||||||
|
.sort((a, b) => a.distance - b.distance)
|
||||||
|
.map(result => ({
|
||||||
|
...result.host,
|
||||||
|
matchType: 'fuzzy',
|
||||||
|
matchDistance: result.distance
|
||||||
|
}));
|
||||||
|
|
||||||
|
return fuzzyMatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
18
index.js
18
index.js
@@ -45,7 +45,14 @@ function formatEpisode(episode, includeNotes = false) {
|
|||||||
const host = dataLoader.getHost(episode.hostid);
|
const host = dataLoader.getHost(episode.hostid);
|
||||||
const seriesInfo = episode.series !== 0 ? dataLoader.getSeries(episode.series) : null;
|
const seriesInfo = episode.series !== 0 ? dataLoader.getSeries(episode.series) : null;
|
||||||
|
|
||||||
let result = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}
|
let result = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}`;
|
||||||
|
|
||||||
|
// Add match type indicator for fuzzy matches
|
||||||
|
if (episode.matchType === 'fuzzy') {
|
||||||
|
result += ` *(fuzzy match, distance: ${episode.matchDistance})*`;
|
||||||
|
}
|
||||||
|
|
||||||
|
result += `
|
||||||
|
|
||||||
**Date:** ${episode.date}
|
**Date:** ${episode.date}
|
||||||
**Host:** ${host?.host || 'Unknown'} (ID: ${episode.hostid})
|
**Host:** ${host?.host || 'Unknown'} (ID: ${episode.hostid})
|
||||||
@@ -606,7 +613,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = `# ${host.host}
|
let text = `# ${host.host}`;
|
||||||
|
|
||||||
|
// Add match type indicator for fuzzy matches
|
||||||
|
if (host.matchType === 'fuzzy') {
|
||||||
|
text += ` *(fuzzy match, distance: ${host.matchDistance})*`;
|
||||||
|
}
|
||||||
|
|
||||||
|
text += `
|
||||||
|
|
||||||
**Host ID:** ${host.hostid}
|
**Host ID:** ${host.hostid}
|
||||||
**Email:** ${host.email}
|
**Email:** ${host.email}
|
||||||
|
|||||||
@@ -139,7 +139,14 @@ function formatEpisode(episode, includeNotes = false) {
|
|||||||
const host = dataLoader.getHost(episode.hostid);
|
const host = dataLoader.getHost(episode.hostid);
|
||||||
const seriesInfo = episode.series !== 0 ? dataLoader.getSeries(episode.series) : null;
|
const seriesInfo = episode.series !== 0 ? dataLoader.getSeries(episode.series) : null;
|
||||||
|
|
||||||
let result = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}
|
let result = `# HPR${String(episode.id).padStart(4, '0')}: ${episode.title}`;
|
||||||
|
|
||||||
|
// Add match type indicator for fuzzy matches
|
||||||
|
if (episode.matchType === 'fuzzy') {
|
||||||
|
result += ` *(fuzzy match, distance: ${episode.matchDistance})*`;
|
||||||
|
}
|
||||||
|
|
||||||
|
result += `
|
||||||
|
|
||||||
**Date:** ${episode.date}
|
**Date:** ${episode.date}
|
||||||
**Host:** ${host?.host || 'Unknown'} (ID: ${episode.hostid})
|
**Host:** ${host?.host || 'Unknown'} (ID: ${episode.hostid})
|
||||||
@@ -718,7 +725,14 @@ All content is contributed by the community, for the community.`,
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = `# ${host.host}
|
let text = `# ${host.host}`;
|
||||||
|
|
||||||
|
// Add match type indicator for fuzzy matches
|
||||||
|
if (host.matchType === 'fuzzy') {
|
||||||
|
text += ` *(fuzzy match, distance: ${host.matchDistance})*`;
|
||||||
|
}
|
||||||
|
|
||||||
|
text += `
|
||||||
|
|
||||||
**Host ID:** ${host.hostid}
|
**Host ID:** ${host.hostid}
|
||||||
**Email:** ${host.email}
|
**Email:** ${host.email}
|
||||||
|
|||||||
113
test-fuzzy-http.js
Normal file
113
test-fuzzy-http.js
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test fuzzy search via HTTP/SSE MCP Server
|
||||||
|
*/
|
||||||
|
|
||||||
|
import EventSource from 'eventsource';
|
||||||
|
import fetch from 'node-fetch';
|
||||||
|
|
||||||
|
const SERVER_URL = 'http://localhost:3000';
|
||||||
|
const SSE_ENDPOINT = `${SERVER_URL}/sse`;
|
||||||
|
const MESSAGE_ENDPOINT = `${SERVER_URL}/message`;
|
||||||
|
|
||||||
|
let requestId = 1;
|
||||||
|
let sse;
|
||||||
|
let connectionId = null;
|
||||||
|
|
||||||
|
async function sendMessage(method, params = {}) {
|
||||||
|
const message = {
|
||||||
|
jsonrpc: '2.0',
|
||||||
|
id: requestId++,
|
||||||
|
method,
|
||||||
|
params
|
||||||
|
};
|
||||||
|
|
||||||
|
return new Promise(async (resolve) => {
|
||||||
|
const handler = (event) => {
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(event.data);
|
||||||
|
if (data.id === message.id) {
|
||||||
|
sse.removeEventListener('message', handler);
|
||||||
|
resolve(data.result);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore parse errors
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
sse.addEventListener('message', handler);
|
||||||
|
|
||||||
|
await fetch(MESSAGE_ENDPOINT, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-connection-id': connectionId
|
||||||
|
},
|
||||||
|
body: JSON.stringify(message)
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function test() {
|
||||||
|
console.log('Testing fuzzy search via HTTP/SSE MCP\n');
|
||||||
|
|
||||||
|
// Connect to SSE
|
||||||
|
sse = new EventSource(SSE_ENDPOINT);
|
||||||
|
|
||||||
|
await new Promise((resolve) => {
|
||||||
|
sse.addEventListener('endpoint', (event) => {
|
||||||
|
const url = new URL(event.data, SERVER_URL);
|
||||||
|
connectionId = url.searchParams.get('sessionId');
|
||||||
|
console.log(`Connected with session ID: ${connectionId}\n`);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
|
||||||
|
// Test 1: Search for host with typo
|
||||||
|
console.log('=== Test 1: Fuzzy Host Search ===');
|
||||||
|
console.log('Searching for host: "klattu" (typo for Klaatu)\n');
|
||||||
|
|
||||||
|
const hostResult = await sendMessage('tools/call', {
|
||||||
|
name: 'get_host_info',
|
||||||
|
arguments: {
|
||||||
|
hostName: 'klattu'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const hostText = hostResult.content[0].text;
|
||||||
|
const hostLines = hostText.split('\n').slice(0, 8);
|
||||||
|
console.log(hostLines.join('\n'));
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 2: Search episodes with typo
|
||||||
|
console.log('=== Test 2: Fuzzy Episode Search ===');
|
||||||
|
console.log('Searching for episodes: "pythoon" (typo for python)\n');
|
||||||
|
|
||||||
|
const episodeResult = await sendMessage('tools/call', {
|
||||||
|
name: 'search_episodes',
|
||||||
|
arguments: {
|
||||||
|
query: 'pythoon',
|
||||||
|
limit: 2
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const episodeText = episodeResult.content[0].text;
|
||||||
|
// Extract just the first episode header
|
||||||
|
const firstEpisode = episodeText.split('\n---\n')[0];
|
||||||
|
const episodeLines = firstEpisode.split('\n').slice(0, 10);
|
||||||
|
console.log(episodeLines.join('\n'));
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
console.log('✅ HTTP/SSE fuzzy search tests completed!\n');
|
||||||
|
|
||||||
|
sse.close();
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
test().catch(err => {
|
||||||
|
console.error('Error:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
82
test-fuzzy-search.js
Normal file
82
test-fuzzy-search.js
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test script for fuzzy search functionality
|
||||||
|
* Tests both episode and host fuzzy matching
|
||||||
|
*/
|
||||||
|
|
||||||
|
import HPRDataLoader from './data-loader.js';
|
||||||
|
|
||||||
|
console.log('Loading HPR data...\n');
|
||||||
|
const dataLoader = new HPRDataLoader();
|
||||||
|
await dataLoader.load();
|
||||||
|
console.log('Data loaded!\n');
|
||||||
|
|
||||||
|
// Test 1: Exact host match (should use exact matching)
|
||||||
|
console.log('=== Test 1: Exact Host Match ===');
|
||||||
|
console.log('Query: "ken"\n');
|
||||||
|
const exactHosts = dataLoader.searchHosts('ken');
|
||||||
|
console.log(`Found ${exactHosts.length} results (exact match)`);
|
||||||
|
exactHosts.slice(0, 3).forEach(host => {
|
||||||
|
console.log(` - ${host.host} (${host.hostid}) [matchType: ${host.matchType}]`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 2: Fuzzy host match with typo
|
||||||
|
console.log('=== Test 2: Fuzzy Host Match (typo) ===');
|
||||||
|
console.log('Query: "klattu" (should match "klaatu")\n');
|
||||||
|
const fuzzyHosts = dataLoader.searchHosts('klattu');
|
||||||
|
console.log(`Found ${fuzzyHosts.length} results`);
|
||||||
|
fuzzyHosts.forEach(host => {
|
||||||
|
console.log(` - ${host.host} (${host.hostid}) [matchType: ${host.matchType}, distance: ${host.matchDistance}]`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 3: Another fuzzy host match
|
||||||
|
console.log('=== Test 3: Fuzzy Host Match (another typo) ===');
|
||||||
|
console.log('Query: "dav" (should find hosts like "Dave")\n');
|
||||||
|
const fuzzyHosts2 = dataLoader.searchHosts('dav');
|
||||||
|
console.log(`Found ${fuzzyHosts2.length} results`);
|
||||||
|
fuzzyHosts2.slice(0, 5).forEach(host => {
|
||||||
|
console.log(` - ${host.host} (${host.hostid}) [matchType: ${host.matchType}${host.matchDistance ? ', distance: ' + host.matchDistance : ''}]`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 4: Exact episode search
|
||||||
|
console.log('=== Test 4: Exact Episode Match ===');
|
||||||
|
console.log('Query: "linux" (exact match in title/summary)\n');
|
||||||
|
const exactEpisodes = dataLoader.searchEpisodes('linux', { limit: 3 });
|
||||||
|
console.log(`Found ${exactEpisodes.length} results`);
|
||||||
|
exactEpisodes.forEach(ep => {
|
||||||
|
console.log(` - HPR${String(ep.id).padStart(4, '0')}: ${ep.title} [matchType: ${ep.matchType}]`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 5: Fuzzy episode search with typo
|
||||||
|
console.log('=== Test 5: Fuzzy Episode Match (typo) ===');
|
||||||
|
console.log('Query: "linx" (should match episodes with "linux" in title)\n');
|
||||||
|
const fuzzyEpisodes = dataLoader.searchEpisodes('linx', { limit: 3 });
|
||||||
|
console.log(`Found ${fuzzyEpisodes.length} results`);
|
||||||
|
fuzzyEpisodes.forEach(ep => {
|
||||||
|
console.log(` - HPR${String(ep.id).padStart(4, '0')}: ${ep.title.substring(0, 60)}... [matchType: ${ep.matchType}${ep.matchDistance ? ', distance: ' + ep.matchDistance : ''}]`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 6: Another fuzzy episode search
|
||||||
|
console.log('=== Test 6: Fuzzy Episode Match (misspelling) ===');
|
||||||
|
console.log('Query: "pythoon" (should match "python")\n');
|
||||||
|
const fuzzyEpisodes2 = dataLoader.searchEpisodes('pythoon', { limit: 3 });
|
||||||
|
console.log(`Found ${fuzzyEpisodes2.length} results`);
|
||||||
|
fuzzyEpisodes2.forEach(ep => {
|
||||||
|
console.log(` - HPR${String(ep.id).padStart(4, '0')}: ${ep.title.substring(0, 60)}... [matchType: ${ep.matchType}${ep.matchDistance ? ', distance: ' + ep.matchDistance : ''}]`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Test 7: No match (distance too large)
|
||||||
|
console.log('=== Test 7: No Match (distance too large) ===');
|
||||||
|
console.log('Query: "xyzabc" (should find nothing)\n');
|
||||||
|
const noMatch = dataLoader.searchHosts('xyzabc');
|
||||||
|
console.log(`Found ${noMatch.length} results`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
console.log('✅ All fuzzy search tests completed!');
|
||||||
Reference in New Issue
Block a user