Phase 3: Speaker diarization and full transcription pipeline
- Implement DiarizeService with pyannote.audio speaker detection - Build PipelineService combining transcribe → diarize → merge with overlap-based speaker assignment per segment - Add pipeline.start and diarize.start IPC handlers - Add run_pipeline Tauri command for full pipeline execution - Wire frontend to use pipeline: speakers auto-created with colors, segments assigned to detected speakers - Build SpeakerManager with rename support (double-click or edit button) - Add speaker color coding throughout transcript display - Add pyannote.audio dependency - Tests: 24 Python (including merge logic), 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
import AIChatPanel from '$lib/components/AIChatPanel.svelte';
|
||||
import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
|
||||
import { segments, speakers } from '$lib/stores/transcript';
|
||||
import type { Segment, Word } from '$lib/types/transcript';
|
||||
import type { Segment, Speaker } from '$lib/types/transcript';
|
||||
|
||||
let waveformPlayer: WaveformPlayer;
|
||||
let audioUrl = $state('');
|
||||
@@ -16,6 +16,9 @@
|
||||
let transcriptionStage = $state('');
|
||||
let transcriptionMessage = $state('');
|
||||
|
||||
// Speaker color palette for auto-assignment
|
||||
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
|
||||
|
||||
function handleWordClick(timeMs: number) {
|
||||
waveformPlayer?.seekTo(timeMs);
|
||||
}
|
||||
@@ -32,11 +35,10 @@
|
||||
if (!filePath) return;
|
||||
|
||||
// Convert file path to URL for wavesurfer
|
||||
// In Tauri, we can use convertFileSrc or asset protocol
|
||||
audioUrl = `asset://localhost/${encodeURIComponent(filePath)}`;
|
||||
waveformPlayer?.loadAudio(audioUrl);
|
||||
|
||||
// Start transcription
|
||||
// Start pipeline (transcription + diarization)
|
||||
isTranscribing = true;
|
||||
transcriptionProgress = 0;
|
||||
transcriptionStage = 'Starting...';
|
||||
@@ -47,6 +49,7 @@
|
||||
text: string;
|
||||
start_ms: number;
|
||||
end_ms: number;
|
||||
speaker: string | null;
|
||||
words: Array<{
|
||||
word: string;
|
||||
start_ms: number;
|
||||
@@ -56,14 +59,29 @@
|
||||
}>;
|
||||
language: string;
|
||||
duration_ms: number;
|
||||
}>('transcribe_file', { filePath });
|
||||
speakers: string[];
|
||||
num_speakers: number;
|
||||
}>('run_pipeline', { filePath });
|
||||
|
||||
// Create speaker entries from pipeline result
|
||||
const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({
|
||||
id: `speaker-${idx}`,
|
||||
project_id: '',
|
||||
label,
|
||||
display_name: null,
|
||||
color: speakerColors[idx % speakerColors.length],
|
||||
}));
|
||||
speakers.set(newSpeakers);
|
||||
|
||||
// Build speaker label → id lookup
|
||||
const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
|
||||
|
||||
// Convert result to our store format
|
||||
const newSegments: Segment[] = result.segments.map((seg, idx) => ({
|
||||
id: `seg-${idx}`,
|
||||
project_id: '',
|
||||
media_file_id: '',
|
||||
speaker_id: null,
|
||||
speaker_id: seg.speaker ? (speakerLookup.get(seg.speaker) ?? null) : null,
|
||||
start_ms: seg.start_ms,
|
||||
end_ms: seg.end_ms,
|
||||
text: seg.text,
|
||||
@@ -85,8 +103,8 @@
|
||||
|
||||
segments.set(newSegments);
|
||||
} catch (err) {
|
||||
console.error('Transcription failed:', err);
|
||||
alert(`Transcription failed: ${err}`);
|
||||
console.error('Pipeline failed:', err);
|
||||
alert(`Pipeline failed: ${err}`);
|
||||
} finally {
|
||||
isTranscribing = false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user