Phase 2: Core transcription pipeline and audio playback
- Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,18 +1,154 @@
|
||||
<div class="transcript-editor">
|
||||
<p>Transcript Editor</p>
|
||||
<p class="placeholder">TipTap rich text editor will be integrated here</p>
|
||||
<script lang="ts">
|
||||
import { segments, speakers } from '$lib/stores/transcript';
|
||||
import { currentTimeMs } from '$lib/stores/playback';
|
||||
import type { Segment, Word, Speaker } from '$lib/types/transcript';
|
||||
|
||||
interface Props {
|
||||
onWordClick?: (timeMs: number) => void;
|
||||
onTextEdit?: (segmentId: string, newText: string) => void;
|
||||
}
|
||||
|
||||
let { onWordClick, onTextEdit }: Props = $props();
|
||||
|
||||
let transcriptContainer: HTMLDivElement;
|
||||
|
||||
function getSpeakerName(speakerId: string | null, speakerList: Speaker[]): string {
|
||||
if (!speakerId) return 'Unknown';
|
||||
const speaker = speakerList.find(s => s.id === speakerId);
|
||||
return speaker?.display_name || speaker?.label || 'Unknown';
|
||||
}
|
||||
|
||||
function getSpeakerColor(speakerId: string | null, speakerList: Speaker[]): string {
|
||||
if (!speakerId) return '#888';
|
||||
const speaker = speakerList.find(s => s.id === speakerId);
|
||||
return speaker?.color || '#888';
|
||||
}
|
||||
|
||||
function formatTimestamp(ms: number): string {
|
||||
const totalSeconds = Math.floor(ms / 1000);
|
||||
const m = Math.floor(totalSeconds / 60);
|
||||
const s = totalSeconds % 60;
|
||||
return `${m}:${s.toString().padStart(2, '0')}`;
|
||||
}
|
||||
|
||||
function isWordActive(word: Word, currentMs: number): boolean {
|
||||
return currentMs >= word.start_ms && currentMs <= word.end_ms;
|
||||
}
|
||||
|
||||
function isSegmentActive(segment: Segment, currentMs: number): boolean {
|
||||
return currentMs >= segment.start_ms && currentMs <= segment.end_ms;
|
||||
}
|
||||
|
||||
function handleWordClick(word: Word) {
|
||||
onWordClick?.(word.start_ms);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="transcript-editor" bind:this={transcriptContainer}>
|
||||
{#if $segments.length === 0}
|
||||
<div class="empty-state">
|
||||
<p>No transcript yet</p>
|
||||
<p class="hint">Import an audio file and run transcription to get started</p>
|
||||
</div>
|
||||
{:else}
|
||||
{#each $segments as segment (segment.id)}
|
||||
<div
|
||||
class="segment"
|
||||
class:active={isSegmentActive(segment, $currentTimeMs)}
|
||||
>
|
||||
<div class="segment-header">
|
||||
<span
|
||||
class="speaker-label"
|
||||
style="border-left-color: {getSpeakerColor(segment.speaker_id, $speakers)}"
|
||||
>
|
||||
{getSpeakerName(segment.speaker_id, $speakers)}
|
||||
</span>
|
||||
<span class="timestamp">{formatTimestamp(segment.start_ms)}</span>
|
||||
</div>
|
||||
<div class="segment-text">
|
||||
{#each segment.words as word (word.id)}
|
||||
<span
|
||||
class="word"
|
||||
class:word-active={isWordActive(word, $currentTimeMs)}
|
||||
onclick={() => handleWordClick(word)}
|
||||
role="button"
|
||||
tabindex="0"
|
||||
onkeydown={(e) => { if (e.key === 'Enter') handleWordClick(word); }}
|
||||
>{word.word} </span>
|
||||
{:else}
|
||||
<span class="segment-plain-text">{segment.text}</span>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.transcript-editor {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 1rem;
|
||||
background: #16213e;
|
||||
border-radius: 8px;
|
||||
color: #e0e0e0;
|
||||
flex: 1;
|
||||
}
|
||||
.placeholder {
|
||||
.empty-state {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
height: 100%;
|
||||
color: #666;
|
||||
}
|
||||
.hint {
|
||||
font-size: 0.875rem;
|
||||
color: #555;
|
||||
}
|
||||
.segment {
|
||||
margin-bottom: 1rem;
|
||||
padding: 0.5rem;
|
||||
border-radius: 4px;
|
||||
transition: background-color 0.2s;
|
||||
}
|
||||
.segment.active {
|
||||
background: rgba(233, 69, 96, 0.1);
|
||||
}
|
||||
.segment-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
.speaker-label {
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
border-left: 3px solid;
|
||||
padding-left: 0.5rem;
|
||||
}
|
||||
.timestamp {
|
||||
color: #666;
|
||||
font-size: 0.75rem;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
.segment-text {
|
||||
line-height: 1.6;
|
||||
padding-left: 0.75rem;
|
||||
}
|
||||
.word {
|
||||
cursor: pointer;
|
||||
border-radius: 2px;
|
||||
padding: 0 1px;
|
||||
transition: background-color 0.15s;
|
||||
}
|
||||
.word:hover {
|
||||
background: rgba(233, 69, 96, 0.2);
|
||||
}
|
||||
.word-active {
|
||||
background: rgba(233, 69, 96, 0.35);
|
||||
color: #fff;
|
||||
}
|
||||
.segment-plain-text {
|
||||
color: #ccc;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -1,17 +1,144 @@
|
||||
<script lang="ts">
|
||||
import { onMount, onDestroy } from 'svelte';
|
||||
import WaveSurfer from 'wavesurfer.js';
|
||||
import { isPlaying, currentTimeMs, durationMs } from '$lib/stores/playback';
|
||||
|
||||
interface Props {
|
||||
audioUrl?: string;
|
||||
onSeek?: (timeMs: number) => void;
|
||||
}
|
||||
|
||||
let { audioUrl = '', onSeek }: Props = $props();
|
||||
|
||||
let container: HTMLDivElement;
|
||||
let wavesurfer: WaveSurfer | null = $state(null);
|
||||
let currentTime = $state('0:00');
|
||||
let totalTime = $state('0:00');
|
||||
|
||||
function formatTime(seconds: number): string {
|
||||
const m = Math.floor(seconds / 60);
|
||||
const s = Math.floor(seconds % 60);
|
||||
return `${m}:${s.toString().padStart(2, '0')}`;
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
wavesurfer = WaveSurfer.create({
|
||||
container,
|
||||
waveColor: '#4a5568',
|
||||
progressColor: '#e94560',
|
||||
cursorColor: '#e94560',
|
||||
height: 80,
|
||||
barWidth: 2,
|
||||
barGap: 1,
|
||||
barRadius: 2,
|
||||
});
|
||||
|
||||
wavesurfer.on('timeupdate', (time: number) => {
|
||||
currentTimeMs.set(Math.round(time * 1000));
|
||||
currentTime = formatTime(time);
|
||||
});
|
||||
|
||||
wavesurfer.on('ready', () => {
|
||||
const dur = wavesurfer!.getDuration();
|
||||
durationMs.set(Math.round(dur * 1000));
|
||||
totalTime = formatTime(dur);
|
||||
});
|
||||
|
||||
wavesurfer.on('play', () => isPlaying.set(true));
|
||||
wavesurfer.on('pause', () => isPlaying.set(false));
|
||||
wavesurfer.on('finish', () => isPlaying.set(false));
|
||||
|
||||
if (audioUrl) {
|
||||
wavesurfer.load(audioUrl);
|
||||
}
|
||||
});
|
||||
|
||||
onDestroy(() => {
|
||||
wavesurfer?.destroy();
|
||||
});
|
||||
|
||||
function togglePlayPause() {
|
||||
wavesurfer?.playPause();
|
||||
}
|
||||
|
||||
function skipBack() {
|
||||
if (wavesurfer) {
|
||||
const time = Math.max(0, wavesurfer.getCurrentTime() - 5);
|
||||
wavesurfer.setTime(time);
|
||||
}
|
||||
}
|
||||
|
||||
function skipForward() {
|
||||
if (wavesurfer) {
|
||||
const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5);
|
||||
wavesurfer.setTime(time);
|
||||
}
|
||||
}
|
||||
|
||||
/** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
|
||||
export function seekTo(timeMs: number) {
|
||||
if (wavesurfer) {
|
||||
wavesurfer.setTime(timeMs / 1000);
|
||||
if (!wavesurfer.isPlaying()) {
|
||||
wavesurfer.play();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Load a new audio file. */
|
||||
export function loadAudio(url: string) {
|
||||
wavesurfer?.load(url);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="waveform-player">
|
||||
<p>Waveform Player</p>
|
||||
<p class="placeholder">wavesurfer.js will be integrated here</p>
|
||||
<div class="waveform-container" bind:this={container}></div>
|
||||
<div class="controls">
|
||||
<button class="control-btn" onclick={skipBack} title="Back 5s">⏪</button>
|
||||
<button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause">
|
||||
{#if $isPlaying}⏸{:else}▶{/if}
|
||||
</button>
|
||||
<button class="control-btn" onclick={skipForward} title="Forward 5s">⏩</button>
|
||||
<span class="time">{currentTime} / {totalTime}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.waveform-player {
|
||||
padding: 1rem;
|
||||
background: #1a1a2e;
|
||||
border-radius: 8px;
|
||||
color: #e0e0e0;
|
||||
padding: 0.75rem;
|
||||
}
|
||||
.placeholder {
|
||||
color: #666;
|
||||
.waveform-container {
|
||||
border-radius: 4px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-top: 0.5rem;
|
||||
}
|
||||
.control-btn {
|
||||
background: #0f3460;
|
||||
border: none;
|
||||
color: #e0e0e0;
|
||||
padding: 0.4rem 0.8rem;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 1rem;
|
||||
}
|
||||
.control-btn:hover {
|
||||
background: #1a4a7a;
|
||||
}
|
||||
.play-btn {
|
||||
padding: 0.4rem 1rem;
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
.time {
|
||||
color: #999;
|
||||
font-size: 0.875rem;
|
||||
margin-left: auto;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -12,3 +12,29 @@ export async function getProject(id: string): Promise<Project | null> {
|
||||
export async function listProjects(): Promise<Project[]> {
|
||||
return invoke('list_projects');
|
||||
}
|
||||
|
||||
export interface TranscriptionResult {
|
||||
segments: Array<{
|
||||
text: string;
|
||||
start_ms: number;
|
||||
end_ms: number;
|
||||
words: Array<{
|
||||
word: string;
|
||||
start_ms: number;
|
||||
end_ms: number;
|
||||
confidence: number;
|
||||
}>;
|
||||
}>;
|
||||
language: string;
|
||||
language_probability: number;
|
||||
duration_ms: number;
|
||||
}
|
||||
|
||||
export async function transcribeFile(
|
||||
filePath: string,
|
||||
model?: string,
|
||||
device?: string,
|
||||
language?: string,
|
||||
): Promise<TranscriptionResult> {
|
||||
return invoke('transcribe_file', { filePath, model, device, language });
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user