Phase 2: Core transcription pipeline and audio playback

- Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 15:53:09 -08:00
parent 503cc6c0cf
commit 48fe41b064
18 changed files with 1775 additions and 32 deletions
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -1,14 +1,104 @@
 <script lang="ts">
+  import { invoke } from '@tauri-apps/api/core';
  import WaveformPlayer from '$lib/components/WaveformPlayer.svelte';
  import TranscriptEditor from '$lib/components/TranscriptEditor.svelte';
  import SpeakerManager from '$lib/components/SpeakerManager.svelte';
  import AIChatPanel from '$lib/components/AIChatPanel.svelte';
+  import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
+  import { segments, speakers } from '$lib/stores/transcript';
+  import type { Segment, Word } from '$lib/types/transcript';
+
+  let waveformPlayer: WaveformPlayer;
+  let audioUrl = $state('');
+  let isTranscribing = $state(false);
+  let transcriptionProgress = $state(0);
+  let transcriptionStage = $state('');
+  let transcriptionMessage = $state('');
+
+  function handleWordClick(timeMs: number) {
+    waveformPlayer?.seekTo(timeMs);
+  }
+
+  async function handleFileImport() {
+    // For now, use a simple prompt — will be replaced with Tauri file dialog
+    const filePath = prompt('Enter path to audio/video file:');
+    if (!filePath) return;
+
+    // Convert file path to URL for wavesurfer
+    // In Tauri, we can use convertFileSrc or asset protocol
+    audioUrl = `asset://localhost/${encodeURIComponent(filePath)}`;
+    waveformPlayer?.loadAudio(audioUrl);
+
+    // Start transcription
+    isTranscribing = true;
+    transcriptionProgress = 0;
+    transcriptionStage = 'Starting...';
+
+    try {
+      const result = await invoke<{
+        segments: Array<{
+          text: string;
+          start_ms: number;
+          end_ms: number;
+          words: Array<{
+            word: string;
+            start_ms: number;
+            end_ms: number;
+            confidence: number;
+          }>;
+        }>;
+        language: string;
+        duration_ms: number;
+      }>('transcribe_file', { filePath });
+
+      // Convert result to our store format
+      const newSegments: Segment[] = result.segments.map((seg, idx) => ({
+        id: `seg-${idx}`,
+        project_id: '',
+        media_file_id: '',
+        speaker_id: null,
+        start_ms: seg.start_ms,
+        end_ms: seg.end_ms,
+        text: seg.text,
+        original_text: null,
+        confidence: null,
+        is_edited: false,
+        edited_at: null,
+        segment_index: idx,
+        words: seg.words.map((w, widx) => ({
+          id: `word-${idx}-${widx}`,
+          segment_id: `seg-${idx}`,
+          word: w.word,
+          start_ms: w.start_ms,
+          end_ms: w.end_ms,
+          confidence: w.confidence,
+          word_index: widx,
+        })),
+      }));
+
+      segments.set(newSegments);
+    } catch (err) {
+      console.error('Transcription failed:', err);
+      alert(`Transcription failed: ${err}`);
+    } finally {
+      isTranscribing = false;
+    }
+  }
 </script>

+<div class="app-header">
+  <h1>Voice to Notes</h1>
+  <div class="header-actions">
+    <button class="import-btn" onclick={handleFileImport}>
+      Import Audio/Video
+    </button>
+  </div>
+</div>
+
 <div class="workspace">
  <div class="main-content">
-    <WaveformPlayer />
-    <TranscriptEditor />
+    <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
+    <TranscriptEditor onWordClick={handleWordClick} />
  </div>
  <div class="sidebar-right">
    <SpeakerManager />
@@ -16,23 +106,58 @@
  </div>
 </div>

+<ProgressOverlay
+  visible={isTranscribing}
+  percent={transcriptionProgress}
+  stage={transcriptionStage}
+  message={transcriptionMessage}
+/>
+
 <style>
+  .app-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 0.5rem 1rem;
+    background: #0f3460;
+    color: #e0e0e0;
+  }
+  h1 {
+    font-size: 1.25rem;
+    margin: 0;
+  }
+  .import-btn {
+    background: #e94560;
+    border: none;
+    color: white;
+    padding: 0.5rem 1rem;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.875rem;
+    font-weight: 500;
+  }
+  .import-btn:hover {
+    background: #d63851;
+  }
  .workspace {
    display: flex;
    gap: 1rem;
    padding: 1rem;
-    height: calc(100vh - 3rem);
+    height: calc(100vh - 3.5rem);
+    background: #0a0a23;
  }
  .main-content {
    flex: 1;
    display: flex;
    flex-direction: column;
    gap: 1rem;
+    min-width: 0;
  }
  .sidebar-right {
    width: 300px;
    display: flex;
    flex-direction: column;
    gap: 1rem;
+    flex-shrink: 0;
  }
 </style>