Phase 2: Core transcription pipeline and audio playback
- Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,14 +1,104 @@
|
||||
<script lang="ts">
|
||||
import { invoke } from '@tauri-apps/api/core';
|
||||
import WaveformPlayer from '$lib/components/WaveformPlayer.svelte';
|
||||
import TranscriptEditor from '$lib/components/TranscriptEditor.svelte';
|
||||
import SpeakerManager from '$lib/components/SpeakerManager.svelte';
|
||||
import AIChatPanel from '$lib/components/AIChatPanel.svelte';
|
||||
import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
|
||||
import { segments, speakers } from '$lib/stores/transcript';
|
||||
import type { Segment, Word } from '$lib/types/transcript';
|
||||
|
||||
let waveformPlayer: WaveformPlayer;
|
||||
let audioUrl = $state('');
|
||||
let isTranscribing = $state(false);
|
||||
let transcriptionProgress = $state(0);
|
||||
let transcriptionStage = $state('');
|
||||
let transcriptionMessage = $state('');
|
||||
|
||||
function handleWordClick(timeMs: number) {
|
||||
waveformPlayer?.seekTo(timeMs);
|
||||
}
|
||||
|
||||
async function handleFileImport() {
|
||||
// For now, use a simple prompt — will be replaced with Tauri file dialog
|
||||
const filePath = prompt('Enter path to audio/video file:');
|
||||
if (!filePath) return;
|
||||
|
||||
// Convert file path to URL for wavesurfer
|
||||
// In Tauri, we can use convertFileSrc or asset protocol
|
||||
audioUrl = `asset://localhost/${encodeURIComponent(filePath)}`;
|
||||
waveformPlayer?.loadAudio(audioUrl);
|
||||
|
||||
// Start transcription
|
||||
isTranscribing = true;
|
||||
transcriptionProgress = 0;
|
||||
transcriptionStage = 'Starting...';
|
||||
|
||||
try {
|
||||
const result = await invoke<{
|
||||
segments: Array<{
|
||||
text: string;
|
||||
start_ms: number;
|
||||
end_ms: number;
|
||||
words: Array<{
|
||||
word: string;
|
||||
start_ms: number;
|
||||
end_ms: number;
|
||||
confidence: number;
|
||||
}>;
|
||||
}>;
|
||||
language: string;
|
||||
duration_ms: number;
|
||||
}>('transcribe_file', { filePath });
|
||||
|
||||
// Convert result to our store format
|
||||
const newSegments: Segment[] = result.segments.map((seg, idx) => ({
|
||||
id: `seg-${idx}`,
|
||||
project_id: '',
|
||||
media_file_id: '',
|
||||
speaker_id: null,
|
||||
start_ms: seg.start_ms,
|
||||
end_ms: seg.end_ms,
|
||||
text: seg.text,
|
||||
original_text: null,
|
||||
confidence: null,
|
||||
is_edited: false,
|
||||
edited_at: null,
|
||||
segment_index: idx,
|
||||
words: seg.words.map((w, widx) => ({
|
||||
id: `word-${idx}-${widx}`,
|
||||
segment_id: `seg-${idx}`,
|
||||
word: w.word,
|
||||
start_ms: w.start_ms,
|
||||
end_ms: w.end_ms,
|
||||
confidence: w.confidence,
|
||||
word_index: widx,
|
||||
})),
|
||||
}));
|
||||
|
||||
segments.set(newSegments);
|
||||
} catch (err) {
|
||||
console.error('Transcription failed:', err);
|
||||
alert(`Transcription failed: ${err}`);
|
||||
} finally {
|
||||
isTranscribing = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="app-header">
|
||||
<h1>Voice to Notes</h1>
|
||||
<div class="header-actions">
|
||||
<button class="import-btn" onclick={handleFileImport}>
|
||||
Import Audio/Video
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="workspace">
|
||||
<div class="main-content">
|
||||
<WaveformPlayer />
|
||||
<TranscriptEditor />
|
||||
<WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
|
||||
<TranscriptEditor onWordClick={handleWordClick} />
|
||||
</div>
|
||||
<div class="sidebar-right">
|
||||
<SpeakerManager />
|
||||
@@ -16,23 +106,58 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<ProgressOverlay
|
||||
visible={isTranscribing}
|
||||
percent={transcriptionProgress}
|
||||
stage={transcriptionStage}
|
||||
message={transcriptionMessage}
|
||||
/>
|
||||
|
||||
<style>
|
||||
.app-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.5rem 1rem;
|
||||
background: #0f3460;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
h1 {
|
||||
font-size: 1.25rem;
|
||||
margin: 0;
|
||||
}
|
||||
.import-btn {
|
||||
background: #e94560;
|
||||
border: none;
|
||||
color: white;
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 0.875rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
.import-btn:hover {
|
||||
background: #d63851;
|
||||
}
|
||||
.workspace {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
padding: 1rem;
|
||||
height: calc(100vh - 3rem);
|
||||
height: calc(100vh - 3.5rem);
|
||||
background: #0a0a23;
|
||||
}
|
||||
.main-content {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
min-width: 0;
|
||||
}
|
||||
.sidebar-right {
|
||||
width: 300px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
</style>
|
||||
|
||||
Reference in New Issue
Block a user