Files
voice-to-notes/src/lib/components/TranscriptEditor.svelte
Josh Knapp 585411f402 Fix speaker diarization: WAV conversion, pyannote 4.0 compat, telemetry bug
- Convert non-WAV audio to 16kHz mono WAV before diarization (pyannote
  v4.0.4 AudioDecoder returns None duration for FLAC, causing crash)
- Handle pyannote 4.0 DiarizeOutput return type (unwrap .speaker_diarization)
- Disable pyannote telemetry (np.isfinite(None) bug with max_speakers)
- Use huggingface_hub.login() to persist token for all sub-downloads
- Pre-download sub-models (segmentation-3.0, speaker-diarization-community-1)
- Add third required model license link in settings UI
- Improve SpeakerManager hints based on settings state
- Add word-wrap to transcript text

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 19:46:07 -08:00

273 lines
7.6 KiB
Svelte

<script lang="ts">
import { segments, speakers } from '$lib/stores/transcript';
import { currentTimeMs, isPlaying } from '$lib/stores/playback';
import type { Segment, Word, Speaker } from '$lib/types/transcript';
interface Props {
onWordClick?: (timeMs: number) => void;
onTextEdit?: (segmentId: string, newText: string) => void;
}
let { onWordClick, onTextEdit }: Props = $props();
let transcriptContainer: HTMLDivElement;
let autoScroll = $state(true);
let lastActiveSegmentId = $state('');
let userScrollTimeout: ReturnType<typeof setTimeout> | null = null;
function getSpeakerName(speakerId: string | null, speakerList: Speaker[]): string {
if (!speakerId) return 'Unknown';
const speaker = speakerList.find(s => s.id === speakerId);
return speaker?.display_name || speaker?.label || 'Unknown';
}
function getSpeakerColor(speakerId: string | null, speakerList: Speaker[]): string {
if (!speakerId) return '#888';
const speaker = speakerList.find(s => s.id === speakerId);
return speaker?.color || '#888';
}
function formatTimestamp(ms: number): string {
const totalSeconds = Math.floor(ms / 1000);
const m = Math.floor(totalSeconds / 60);
const s = totalSeconds % 60;
return `${m}:${s.toString().padStart(2, '0')}`;
}
function isWordActive(word: Word, currentMs: number): boolean {
return currentMs >= word.start_ms && currentMs <= word.end_ms;
}
function isSegmentActive(segment: Segment, currentMs: number): boolean {
return currentMs >= segment.start_ms && currentMs <= segment.end_ms;
}
let editingSegmentId = $state<string | null>(null);
let editText = $state('');
function handleWordClick(word: Word) {
onWordClick?.(word.start_ms);
}
function startEditing(segment: Segment) {
editingSegmentId = segment.id;
// Combine word texts or fall back to segment text
editText = segment.words.length > 0
? segment.words.map(w => w.word).join(' ')
: segment.text;
}
function finishEditing(segmentId: string) {
const trimmed = editText.trim();
if (trimmed) {
// Update the segment text in the store
segments.update(segs => segs.map(s => {
if (s.id !== segmentId) return s;
return {
...s,
text: trimmed,
original_text: s.original_text ?? s.text,
is_edited: true,
edited_at: new Date().toISOString(),
};
}));
onTextEdit?.(segmentId, trimmed);
}
editingSegmentId = null;
}
function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
if (e.key === 'Escape') {
editingSegmentId = null;
} else if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
finishEditing(segmentId);
}
}
// Pause auto-scroll when user manually scrolls, resume after 3 seconds
function handleScroll() {
if (!$isPlaying) return;
autoScroll = false;
if (userScrollTimeout) clearTimeout(userScrollTimeout);
userScrollTimeout = setTimeout(() => {
autoScroll = true;
}, 3000);
}
// Auto-scroll to the active segment during playback
$effect(() => {
if (!$isPlaying || !autoScroll || !transcriptContainer) return;
const currentMs = $currentTimeMs;
const activeSegment = $segments.find(s => isSegmentActive(s, currentMs));
if (!activeSegment || activeSegment.id === lastActiveSegmentId) return;
lastActiveSegmentId = activeSegment.id;
const el = transcriptContainer.querySelector(`[data-segment-id="${activeSegment.id}"]`);
if (el) {
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
});
</script>
<div class="transcript-editor" bind:this={transcriptContainer} onscroll={handleScroll}>
{#if $segments.length === 0}
<div class="empty-state">
<p>No transcript yet</p>
<p class="hint">Import an audio file and run transcription to get started</p>
</div>
{:else}
{#each $segments as segment (segment.id)}
<div
class="segment"
class:active={isSegmentActive(segment, $currentTimeMs)}
data-segment-id={segment.id}
>
<div class="segment-header">
<span
class="speaker-label"
style="border-left-color: {getSpeakerColor(segment.speaker_id, $speakers)}"
>
{getSpeakerName(segment.speaker_id, $speakers)}
</span>
<span class="timestamp">{formatTimestamp(segment.start_ms)}</span>
</div>
{#if editingSegmentId === segment.id}
<div class="segment-edit">
<textarea
class="edit-textarea"
bind:value={editText}
onblur={() => finishEditing(segment.id)}
onkeydown={(e) => handleEditKeydown(e, segment.id)}
></textarea>
<span class="edit-hint">Enter to save, Esc to cancel</span>
</div>
{:else}
<!-- svelte-ignore a11y_no_static_element_interactions -->
<div class="segment-text" ondblclick={() => startEditing(segment)}>
{#each segment.words as word (word.id)}
<span
class="word"
class:word-active={isWordActive(word, $currentTimeMs)}
onclick={() => handleWordClick(word)}
role="button"
tabindex="0"
onkeydown={(e) => { if (e.key === 'Enter') handleWordClick(word); }}
>{word.word} </span>
{:else}
<span class="segment-plain-text">{segment.text}</span>
{/each}
{#if segment.is_edited}
<span class="edited-badge">edited</span>
{/if}
</div>
{/if}
</div>
{/each}
{/if}
</div>
<style>
.transcript-editor {
flex: 1;
overflow-y: auto;
padding: 1rem;
background: #16213e;
border-radius: 8px;
color: #e0e0e0;
}
.empty-state {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
color: #666;
}
.hint {
font-size: 0.875rem;
color: #555;
}
.segment {
margin-bottom: 1rem;
padding: 0.5rem;
border-radius: 4px;
transition: background-color 0.2s;
}
.segment.active {
background: rgba(233, 69, 96, 0.1);
}
.segment-header {
display: flex;
align-items: center;
gap: 0.5rem;
margin-bottom: 0.25rem;
}
.speaker-label {
font-weight: 600;
font-size: 0.875rem;
border-left: 3px solid;
padding-left: 0.5rem;
}
.timestamp {
color: #666;
font-size: 0.75rem;
font-variant-numeric: tabular-nums;
}
.segment-text {
line-height: 1.6;
padding-left: 0.75rem;
word-wrap: break-word;
overflow-wrap: break-word;
}
.word {
cursor: pointer;
border-radius: 2px;
padding: 0 1px;
transition: background-color 0.15s;
}
.word:hover {
background: rgba(233, 69, 96, 0.2);
}
.word-active {
background: rgba(233, 69, 96, 0.35);
color: #fff;
}
.segment-plain-text {
color: #ccc;
}
.segment-edit {
padding-left: 0.75rem;
}
.edit-textarea {
width: 100%;
min-height: 3rem;
background: #1a1a2e;
color: #e0e0e0;
border: 1px solid #e94560;
border-radius: 4px;
padding: 0.5rem;
font-family: inherit;
font-size: inherit;
line-height: 1.6;
resize: vertical;
}
.edit-textarea:focus {
outline: none;
border-color: #ff6b81;
}
.edit-hint {
font-size: 0.7rem;
color: #666;
}
.edited-badge {
font-size: 0.65rem;
color: #e94560;
background: rgba(233, 69, 96, 0.15);
padding: 0.1rem 0.3rem;
border-radius: 3px;
margin-left: 0.5rem;
vertical-align: middle;
}
</style>