Phase 2: Core transcription pipeline and audio playback

- Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 15:53:09 -08:00
parent 503cc6c0cf
commit 48fe41b064
18 changed files with 1775 additions and 32 deletions
--- a/src/lib/components/TranscriptEditor.svelte
+++ b/src/lib/components/TranscriptEditor.svelte
@@ -1,18 +1,154 @@
-<div class="transcript-editor">
-  <p>Transcript Editor</p>
-  <p class="placeholder">TipTap rich text editor will be integrated here</p>
+<script lang="ts">
+  import { segments, speakers } from '$lib/stores/transcript';
+  import { currentTimeMs } from '$lib/stores/playback';
+  import type { Segment, Word, Speaker } from '$lib/types/transcript';
+
+  interface Props {
+    onWordClick?: (timeMs: number) => void;
+    onTextEdit?: (segmentId: string, newText: string) => void;
+  }
+
+  let { onWordClick, onTextEdit }: Props = $props();
+
+  let transcriptContainer: HTMLDivElement;
+
+  function getSpeakerName(speakerId: string | null, speakerList: Speaker[]): string {
+    if (!speakerId) return 'Unknown';
+    const speaker = speakerList.find(s => s.id === speakerId);
+    return speaker?.display_name || speaker?.label || 'Unknown';
+  }
+
+  function getSpeakerColor(speakerId: string | null, speakerList: Speaker[]): string {
+    if (!speakerId) return '#888';
+    const speaker = speakerList.find(s => s.id === speakerId);
+    return speaker?.color || '#888';
+  }
+
+  function formatTimestamp(ms: number): string {
+    const totalSeconds = Math.floor(ms / 1000);
+    const m = Math.floor(totalSeconds / 60);
+    const s = totalSeconds % 60;
+    return `${m}:${s.toString().padStart(2, '0')}`;
+  }
+
+  function isWordActive(word: Word, currentMs: number): boolean {
+    return currentMs >= word.start_ms && currentMs <= word.end_ms;
+  }
+
+  function isSegmentActive(segment: Segment, currentMs: number): boolean {
+    return currentMs >= segment.start_ms && currentMs <= segment.end_ms;
+  }
+
+  function handleWordClick(word: Word) {
+    onWordClick?.(word.start_ms);
+  }
+</script>
+
+<div class="transcript-editor" bind:this={transcriptContainer}>
+  {#if $segments.length === 0}
+    <div class="empty-state">
+      <p>No transcript yet</p>
+      <p class="hint">Import an audio file and run transcription to get started</p>
+    </div>
+  {:else}
+    {#each $segments as segment (segment.id)}
+      <div
+        class="segment"
+        class:active={isSegmentActive(segment, $currentTimeMs)}
+      >
+        <div class="segment-header">
+          <span
+            class="speaker-label"
+            style="border-left-color: {getSpeakerColor(segment.speaker_id, $speakers)}"
+          >
+            {getSpeakerName(segment.speaker_id, $speakers)}
+          </span>
+          <span class="timestamp">{formatTimestamp(segment.start_ms)}</span>
+        </div>
+        <div class="segment-text">
+          {#each segment.words as word (word.id)}
+            <span
+              class="word"
+              class:word-active={isWordActive(word, $currentTimeMs)}
+              onclick={() => handleWordClick(word)}
+              role="button"
+              tabindex="0"
+              onkeydown={(e) => { if (e.key === 'Enter') handleWordClick(word); }}
+            >{word.word} </span>
+          {:else}
+            <span class="segment-plain-text">{segment.text}</span>
+          {/each}
+        </div>
+      </div>
+    {/each}
+  {/if}
 </div>

 <style>
  .transcript-editor {
+    flex: 1;
+    overflow-y: auto;
    padding: 1rem;
    background: #16213e;
    border-radius: 8px;
    color: #e0e0e0;
-    flex: 1;
  }
-  .placeholder {
+  .empty-state {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    height: 100%;
    color: #666;
+  }
+  .hint {
    font-size: 0.875rem;
+    color: #555;
+  }
+  .segment {
+    margin-bottom: 1rem;
+    padding: 0.5rem;
+    border-radius: 4px;
+    transition: background-color 0.2s;
+  }
+  .segment.active {
+    background: rgba(233, 69, 96, 0.1);
+  }
+  .segment-header {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin-bottom: 0.25rem;
+  }
+  .speaker-label {
+    font-weight: 600;
+    font-size: 0.875rem;
+    border-left: 3px solid;
+    padding-left: 0.5rem;
+  }
+  .timestamp {
+    color: #666;
+    font-size: 0.75rem;
+    font-variant-numeric: tabular-nums;
+  }
+  .segment-text {
+    line-height: 1.6;
+    padding-left: 0.75rem;
+  }
+  .word {
+    cursor: pointer;
+    border-radius: 2px;
+    padding: 0 1px;
+    transition: background-color 0.15s;
+  }
+  .word:hover {
+    background: rgba(233, 69, 96, 0.2);
+  }
+  .word-active {
+    background: rgba(233, 69, 96, 0.35);
+    color: #fff;
+  }
+  .segment-plain-text {
+    color: #ccc;
  }
 </style>
--- a/src/lib/components/WaveformPlayer.svelte
+++ b/src/lib/components/WaveformPlayer.svelte
@@ -1,17 +1,144 @@
+<script lang="ts">
+  import { onMount, onDestroy } from 'svelte';
+  import WaveSurfer from 'wavesurfer.js';
+  import { isPlaying, currentTimeMs, durationMs } from '$lib/stores/playback';
+
+  interface Props {
+    audioUrl?: string;
+    onSeek?: (timeMs: number) => void;
+  }
+
+  let { audioUrl = '', onSeek }: Props = $props();
+
+  let container: HTMLDivElement;
+  let wavesurfer: WaveSurfer | null = $state(null);
+  let currentTime = $state('0:00');
+  let totalTime = $state('0:00');
+
+  function formatTime(seconds: number): string {
+    const m = Math.floor(seconds / 60);
+    const s = Math.floor(seconds % 60);
+    return `${m}:${s.toString().padStart(2, '0')}`;
+  }
+
+  onMount(() => {
+    wavesurfer = WaveSurfer.create({
+      container,
+      waveColor: '#4a5568',
+      progressColor: '#e94560',
+      cursorColor: '#e94560',
+      height: 80,
+      barWidth: 2,
+      barGap: 1,
+      barRadius: 2,
+    });
+
+    wavesurfer.on('timeupdate', (time: number) => {
+      currentTimeMs.set(Math.round(time * 1000));
+      currentTime = formatTime(time);
+    });
+
+    wavesurfer.on('ready', () => {
+      const dur = wavesurfer!.getDuration();
+      durationMs.set(Math.round(dur * 1000));
+      totalTime = formatTime(dur);
+    });
+
+    wavesurfer.on('play', () => isPlaying.set(true));
+    wavesurfer.on('pause', () => isPlaying.set(false));
+    wavesurfer.on('finish', () => isPlaying.set(false));
+
+    if (audioUrl) {
+      wavesurfer.load(audioUrl);
+    }
+  });
+
+  onDestroy(() => {
+    wavesurfer?.destroy();
+  });
+
+  function togglePlayPause() {
+    wavesurfer?.playPause();
+  }
+
+  function skipBack() {
+    if (wavesurfer) {
+      const time = Math.max(0, wavesurfer.getCurrentTime() - 5);
+      wavesurfer.setTime(time);
+    }
+  }
+
+  function skipForward() {
+    if (wavesurfer) {
+      const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5);
+      wavesurfer.setTime(time);
+    }
+  }
+
+  /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
+  export function seekTo(timeMs: number) {
+    if (wavesurfer) {
+      wavesurfer.setTime(timeMs / 1000);
+      if (!wavesurfer.isPlaying()) {
+        wavesurfer.play();
+      }
+    }
+  }
+
+  /** Load a new audio file. */
+  export function loadAudio(url: string) {
+    wavesurfer?.load(url);
+  }
+</script>
+
 <div class="waveform-player">
-  <p>Waveform Player</p>
-  <p class="placeholder">wavesurfer.js will be integrated here</p>
+  <div class="waveform-container" bind:this={container}></div>
+  <div class="controls">
+    <button class="control-btn" onclick={skipBack} title="Back 5s">⏪</button>
+    <button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause">
+      {#if $isPlaying}⏸{:else}▶{/if}
+    </button>
+    <button class="control-btn" onclick={skipForward} title="Forward 5s">⏩</button>
+    <span class="time">{currentTime} / {totalTime}</span>
+  </div>
 </div>

 <style>
  .waveform-player {
-    padding: 1rem;
    background: #1a1a2e;
    border-radius: 8px;
-    color: #e0e0e0;
+    padding: 0.75rem;
  }
-  .placeholder {
-    color: #666;
+  .waveform-container {
+    border-radius: 4px;
+    overflow: hidden;
+  }
+  .controls {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin-top: 0.5rem;
+  }
+  .control-btn {
+    background: #0f3460;
+    border: none;
+    color: #e0e0e0;
+    padding: 0.4rem 0.8rem;
+    border-radius: 4px;
+    cursor: pointer;
+    font-size: 1rem;
+  }
+  .control-btn:hover {
+    background: #1a4a7a;
+  }
+  .play-btn {
+    padding: 0.4rem 1rem;
+    font-size: 1.2rem;
+  }
+  .time {
+    color: #999;
    font-size: 0.875rem;
+    margin-left: auto;
+    font-variant-numeric: tabular-nums;
  }
 </style>
--- a/src/lib/services/tauri-bridge.ts
+++ b/src/lib/services/tauri-bridge.ts
@@ -12,3 +12,29 @@ export async function getProject(id: string): Promise<Project | null> {
 export async function listProjects(): Promise<Project[]> {
  return invoke('list_projects');
 }
+
+export interface TranscriptionResult {
+  segments: Array<{
+    text: string;
+    start_ms: number;
+    end_ms: number;
+    words: Array<{
+      word: string;
+      start_ms: number;
+      end_ms: number;
+      confidence: number;
+    }>;
+  }>;
+  language: string;
+  language_probability: number;
+  duration_ms: number;
+}
+
+export async function transcribeFile(
+  filePath: string,
+  model?: string,
+  device?: string,
+  language?: string,
+): Promise<TranscriptionResult> {
+  return invoke('transcribe_file', { filePath, model, device, language });
+}
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -1,14 +1,104 @@
 <script lang="ts">
+  import { invoke } from '@tauri-apps/api/core';
  import WaveformPlayer from '$lib/components/WaveformPlayer.svelte';
  import TranscriptEditor from '$lib/components/TranscriptEditor.svelte';
  import SpeakerManager from '$lib/components/SpeakerManager.svelte';
  import AIChatPanel from '$lib/components/AIChatPanel.svelte';
+  import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
+  import { segments, speakers } from '$lib/stores/transcript';
+  import type { Segment, Word } from '$lib/types/transcript';
+
+  let waveformPlayer: WaveformPlayer;
+  let audioUrl = $state('');
+  let isTranscribing = $state(false);
+  let transcriptionProgress = $state(0);
+  let transcriptionStage = $state('');
+  let transcriptionMessage = $state('');
+
+  function handleWordClick(timeMs: number) {
+    waveformPlayer?.seekTo(timeMs);
+  }
+
+  async function handleFileImport() {
+    // For now, use a simple prompt — will be replaced with Tauri file dialog
+    const filePath = prompt('Enter path to audio/video file:');
+    if (!filePath) return;
+
+    // Convert file path to URL for wavesurfer
+    // In Tauri, we can use convertFileSrc or asset protocol
+    audioUrl = `asset://localhost/${encodeURIComponent(filePath)}`;
+    waveformPlayer?.loadAudio(audioUrl);
+
+    // Start transcription
+    isTranscribing = true;
+    transcriptionProgress = 0;
+    transcriptionStage = 'Starting...';
+
+    try {
+      const result = await invoke<{
+        segments: Array<{
+          text: string;
+          start_ms: number;
+          end_ms: number;
+          words: Array<{
+            word: string;
+            start_ms: number;
+            end_ms: number;
+            confidence: number;
+          }>;
+        }>;
+        language: string;
+        duration_ms: number;
+      }>('transcribe_file', { filePath });
+
+      // Convert result to our store format
+      const newSegments: Segment[] = result.segments.map((seg, idx) => ({
+        id: `seg-${idx}`,
+        project_id: '',
+        media_file_id: '',
+        speaker_id: null,
+        start_ms: seg.start_ms,
+        end_ms: seg.end_ms,
+        text: seg.text,
+        original_text: null,
+        confidence: null,
+        is_edited: false,
+        edited_at: null,
+        segment_index: idx,
+        words: seg.words.map((w, widx) => ({
+          id: `word-${idx}-${widx}`,
+          segment_id: `seg-${idx}`,
+          word: w.word,
+          start_ms: w.start_ms,
+          end_ms: w.end_ms,
+          confidence: w.confidence,
+          word_index: widx,
+        })),
+      }));
+
+      segments.set(newSegments);
+    } catch (err) {
+      console.error('Transcription failed:', err);
+      alert(`Transcription failed: ${err}`);
+    } finally {
+      isTranscribing = false;
+    }
+  }
 </script>

+<div class="app-header">
+  <h1>Voice to Notes</h1>
+  <div class="header-actions">
+    <button class="import-btn" onclick={handleFileImport}>
+      Import Audio/Video
+    </button>
+  </div>
+</div>
+
 <div class="workspace">
  <div class="main-content">
-    <WaveformPlayer />
-    <TranscriptEditor />
+    <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
+    <TranscriptEditor onWordClick={handleWordClick} />
  </div>
  <div class="sidebar-right">
    <SpeakerManager />
@@ -16,23 +106,58 @@
  </div>
 </div>

+<ProgressOverlay
+  visible={isTranscribing}
+  percent={transcriptionProgress}
+  stage={transcriptionStage}
+  message={transcriptionMessage}
+/>
+
 <style>
+  .app-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 0.5rem 1rem;
+    background: #0f3460;
+    color: #e0e0e0;
+  }
+  h1 {
+    font-size: 1.25rem;
+    margin: 0;
+  }
+  .import-btn {
+    background: #e94560;
+    border: none;
+    color: white;
+    padding: 0.5rem 1rem;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.875rem;
+    font-weight: 500;
+  }
+  .import-btn:hover {
+    background: #d63851;
+  }
  .workspace {
    display: flex;
    gap: 1rem;
    padding: 1rem;
-    height: calc(100vh - 3rem);
+    height: calc(100vh - 3.5rem);
+    background: #0a0a23;
  }
  .main-content {
    flex: 1;
    display: flex;
    flex-direction: column;
    gap: 1rem;
+    min-width: 0;
  }
  .sidebar-right {
    width: 300px;
    display: flex;
    flex-direction: column;
    gap: 1rem;
+    flex-shrink: 0;
  }
 </style>