Phase 2: Core transcription pipeline and audio playback

- Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 15:53:09 -08:00
parent 503cc6c0cf
commit 48fe41b064
18 changed files with 1775 additions and 32 deletions
--- a/src/lib/components/TranscriptEditor.svelte
+++ b/src/lib/components/TranscriptEditor.svelte
@@ -1,18 +1,154 @@
-<div class="transcript-editor">
-  <p>Transcript Editor</p>
-  <p class="placeholder">TipTap rich text editor will be integrated here</p>
+<script lang="ts">
+  import { segments, speakers } from '$lib/stores/transcript';
+  import { currentTimeMs } from '$lib/stores/playback';
+  import type { Segment, Word, Speaker } from '$lib/types/transcript';
+
+  interface Props {
+    onWordClick?: (timeMs: number) => void;
+    onTextEdit?: (segmentId: string, newText: string) => void;
+  }
+
+  let { onWordClick, onTextEdit }: Props = $props();
+
+  let transcriptContainer: HTMLDivElement;
+
+  function getSpeakerName(speakerId: string | null, speakerList: Speaker[]): string {
+    if (!speakerId) return 'Unknown';
+    const speaker = speakerList.find(s => s.id === speakerId);
+    return speaker?.display_name || speaker?.label || 'Unknown';
+  }
+
+  function getSpeakerColor(speakerId: string | null, speakerList: Speaker[]): string {
+    if (!speakerId) return '#888';
+    const speaker = speakerList.find(s => s.id === speakerId);
+    return speaker?.color || '#888';
+  }
+
+  function formatTimestamp(ms: number): string {
+    const totalSeconds = Math.floor(ms / 1000);
+    const m = Math.floor(totalSeconds / 60);
+    const s = totalSeconds % 60;
+    return `${m}:${s.toString().padStart(2, '0')}`;
+  }
+
+  function isWordActive(word: Word, currentMs: number): boolean {
+    return currentMs >= word.start_ms && currentMs <= word.end_ms;
+  }
+
+  function isSegmentActive(segment: Segment, currentMs: number): boolean {
+    return currentMs >= segment.start_ms && currentMs <= segment.end_ms;
+  }
+
+  function handleWordClick(word: Word) {
+    onWordClick?.(word.start_ms);
+  }
+</script>
+
+<div class="transcript-editor" bind:this={transcriptContainer}>
+  {#if $segments.length === 0}
+    <div class="empty-state">
+      <p>No transcript yet</p>
+      <p class="hint">Import an audio file and run transcription to get started</p>
+    </div>
+  {:else}
+    {#each $segments as segment (segment.id)}
+      <div
+        class="segment"
+        class:active={isSegmentActive(segment, $currentTimeMs)}
+      >
+        <div class="segment-header">
+          <span
+            class="speaker-label"
+            style="border-left-color: {getSpeakerColor(segment.speaker_id, $speakers)}"
+          >
+            {getSpeakerName(segment.speaker_id, $speakers)}
+          </span>
+          <span class="timestamp">{formatTimestamp(segment.start_ms)}</span>
+        </div>
+        <div class="segment-text">
+          {#each segment.words as word (word.id)}
+            <span
+              class="word"
+              class:word-active={isWordActive(word, $currentTimeMs)}
+              onclick={() => handleWordClick(word)}
+              role="button"
+              tabindex="0"
+              onkeydown={(e) => { if (e.key === 'Enter') handleWordClick(word); }}
+            >{word.word} </span>
+          {:else}
+            <span class="segment-plain-text">{segment.text}</span>
+          {/each}
+        </div>
+      </div>
+    {/each}
+  {/if}
 </div>

 <style>
  .transcript-editor {
+    flex: 1;
+    overflow-y: auto;
    padding: 1rem;
    background: #16213e;
    border-radius: 8px;
    color: #e0e0e0;
-    flex: 1;
  }
-  .placeholder {
+  .empty-state {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    height: 100%;
    color: #666;
+  }
+  .hint {
    font-size: 0.875rem;
+    color: #555;
+  }
+  .segment {
+    margin-bottom: 1rem;
+    padding: 0.5rem;
+    border-radius: 4px;
+    transition: background-color 0.2s;
+  }
+  .segment.active {
+    background: rgba(233, 69, 96, 0.1);
+  }
+  .segment-header {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    margin-bottom: 0.25rem;
+  }
+  .speaker-label {
+    font-weight: 600;
+    font-size: 0.875rem;
+    border-left: 3px solid;
+    padding-left: 0.5rem;
+  }
+  .timestamp {
+    color: #666;
+    font-size: 0.75rem;
+    font-variant-numeric: tabular-nums;
+  }
+  .segment-text {
+    line-height: 1.6;
+    padding-left: 0.75rem;
+  }
+  .word {
+    cursor: pointer;
+    border-radius: 2px;
+    padding: 0 1px;
+    transition: background-color 0.15s;
+  }
+  .word:hover {
+    background: rgba(233, 69, 96, 0.2);
+  }
+  .word-active {
+    background: rgba(233, 69, 96, 0.35);
+    color: #fff;
+  }
+  .segment-plain-text {
+    color: #ccc;
  }
 </style>