When editing a segment, word timing is now intelligently redistributed: - Spelling fixes (same word count): each word keeps its original timing - Word splits (e.g. "gonna" → "going to"): original word's time range is divided proportionally across the new words - Inserted words: timing interpolated from neighboring words - Deleted words: remaining words keep their timing, gaps collapse This preserves click-to-seek accuracy for common edits like fixing misheard words or splitting concatenated words. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
375 lines
12 KiB
Svelte
375 lines
12 KiB
Svelte
<script lang="ts">
|
|
import { segments, speakers } from '$lib/stores/transcript';
|
|
import { currentTimeMs, isPlaying } from '$lib/stores/playback';
|
|
import type { Segment, Word, Speaker } from '$lib/types/transcript';
|
|
|
|
interface Props {
|
|
onWordClick?: (timeMs: number) => void;
|
|
onTextEdit?: (segmentId: string, newText: string) => void;
|
|
}
|
|
|
|
let { onWordClick, onTextEdit }: Props = $props();
|
|
|
|
let transcriptContainer: HTMLDivElement;
|
|
let autoScroll = $state(true);
|
|
let lastActiveSegmentId = $state('');
|
|
let userScrollTimeout: ReturnType<typeof setTimeout> | null = null;
|
|
|
|
function getSpeakerName(speakerId: string | null, speakerList: Speaker[]): string {
|
|
if (!speakerId) return 'Unknown';
|
|
const speaker = speakerList.find(s => s.id === speakerId);
|
|
return speaker?.display_name || speaker?.label || 'Unknown';
|
|
}
|
|
|
|
function getSpeakerColor(speakerId: string | null, speakerList: Speaker[]): string {
|
|
if (!speakerId) return '#888';
|
|
const speaker = speakerList.find(s => s.id === speakerId);
|
|
return speaker?.color || '#888';
|
|
}
|
|
|
|
function formatTimestamp(ms: number): string {
|
|
const totalSeconds = Math.floor(ms / 1000);
|
|
const m = Math.floor(totalSeconds / 60);
|
|
const s = totalSeconds % 60;
|
|
return `${m}:${s.toString().padStart(2, '0')}`;
|
|
}
|
|
|
|
function isWordActive(word: Word, currentMs: number): boolean {
|
|
return currentMs >= word.start_ms && currentMs <= word.end_ms;
|
|
}
|
|
|
|
function isSegmentActive(segment: Segment, currentMs: number): boolean {
|
|
return currentMs >= segment.start_ms && currentMs <= segment.end_ms;
|
|
}
|
|
|
|
let editingSegmentId = $state<string | null>(null);
|
|
let editText = $state('');
|
|
|
|
function handleWordClick(word: Word) {
|
|
onWordClick?.(word.start_ms);
|
|
}
|
|
|
|
function startEditing(segment: Segment) {
|
|
editingSegmentId = segment.id;
|
|
// Combine word texts or fall back to segment text
|
|
editText = segment.words.length > 0
|
|
? segment.words.map(w => w.word).join(' ')
|
|
: segment.text;
|
|
}
|
|
|
|
function finishEditing(segmentId: string) {
|
|
const trimmed = editText.trim();
|
|
if (trimmed) {
|
|
segments.update(segs => segs.map(s => {
|
|
if (s.id !== segmentId) return s;
|
|
const newWordTexts = trimmed.split(/\s+/);
|
|
const newWords = redistributeWords(s, newWordTexts);
|
|
return {
|
|
...s,
|
|
text: trimmed,
|
|
words: newWords,
|
|
original_text: s.original_text ?? s.text,
|
|
is_edited: true,
|
|
edited_at: new Date().toISOString(),
|
|
};
|
|
}));
|
|
onTextEdit?.(segmentId, trimmed);
|
|
}
|
|
editingSegmentId = null;
|
|
}
|
|
|
|
/**
|
|
* Redistribute word timing after an edit.
|
|
*
|
|
* Uses a diff-like alignment between old and new word lists:
|
|
* - Unchanged words keep their original timing
|
|
* - Spelling fixes (same position, same count) keep timing
|
|
* - Split words (1 old → N new) divide the original time range proportionally
|
|
* - Inserted words with no match get interpolated timing
|
|
*/
|
|
function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
|
|
const oldWords = segment.words;
|
|
|
|
// Same word count — preserve per-word timing (spelling fixes)
|
|
if (newWordTexts.length === oldWords.length) {
|
|
return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
|
|
}
|
|
|
|
// Align old words to new words using a simple greedy match.
|
|
// Build a mapping: for each old word, which new words does it cover?
|
|
const oldTexts = oldWords.map(w => w.word.toLowerCase());
|
|
const newTexts = newWordTexts.map(w => w.toLowerCase());
|
|
|
|
// Walk both lists, greedily matching old words to new words
|
|
const result: Word[] = [];
|
|
let oldIdx = 0;
|
|
let newIdx = 0;
|
|
|
|
while (newIdx < newTexts.length) {
|
|
if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
|
|
// Exact match — keep original timing
|
|
result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
|
|
oldIdx++;
|
|
newIdx++;
|
|
} else if (oldIdx < oldTexts.length) {
|
|
// Check if old word was split into multiple new words.
|
|
// E.g., "gonna" → "going to": see if concatenating upcoming new words
|
|
// matches the old word (or close enough — just check if old word's chars
|
|
// are consumed by the next few new words).
|
|
let splitCount = 0;
|
|
let combined = '';
|
|
for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
|
|
combined += (k > newIdx ? '' : '') + newTexts[k];
|
|
if (combined.length >= oldTexts[oldIdx].length) {
|
|
splitCount = k - newIdx + 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (splitCount > 1) {
|
|
// Split: distribute the old word's time range proportionally
|
|
const ow = oldWords[oldIdx];
|
|
const totalDuration = ow.end_ms - ow.start_ms;
|
|
for (let k = 0; k < splitCount; k++) {
|
|
const fraction = 1 / splitCount;
|
|
result.push({
|
|
id: `${segment.id}-word-${newIdx + k}`,
|
|
segment_id: segment.id,
|
|
word: newWordTexts[newIdx + k],
|
|
start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
|
|
end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
|
|
confidence: ow.confidence,
|
|
word_index: newIdx + k,
|
|
});
|
|
}
|
|
oldIdx++;
|
|
newIdx += splitCount;
|
|
} else {
|
|
// No match found — interpolate timing from neighbors
|
|
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
|
|
const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
|
|
result.push({
|
|
id: `${segment.id}-word-${newIdx}`,
|
|
segment_id: segment.id,
|
|
word: newWordTexts[newIdx],
|
|
start_ms: prevEnd,
|
|
end_ms: nextStart,
|
|
confidence: 1.0,
|
|
word_index: newIdx,
|
|
});
|
|
newIdx++;
|
|
}
|
|
} else {
|
|
// No more old words — use end of segment
|
|
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
|
|
result.push({
|
|
id: `${segment.id}-word-${newIdx}`,
|
|
segment_id: segment.id,
|
|
word: newWordTexts[newIdx],
|
|
start_ms: prevEnd,
|
|
end_ms: segment.end_ms,
|
|
confidence: 1.0,
|
|
word_index: newIdx,
|
|
});
|
|
newIdx++;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
|
|
if (e.key === 'Escape') {
|
|
editingSegmentId = null;
|
|
} else if (e.key === 'Enter' && !e.shiftKey) {
|
|
e.preventDefault();
|
|
finishEditing(segmentId);
|
|
}
|
|
}
|
|
|
|
// Pause auto-scroll when user manually scrolls, resume after 3 seconds
|
|
function handleScroll() {
|
|
if (!$isPlaying) return;
|
|
autoScroll = false;
|
|
if (userScrollTimeout) clearTimeout(userScrollTimeout);
|
|
userScrollTimeout = setTimeout(() => {
|
|
autoScroll = true;
|
|
}, 3000);
|
|
}
|
|
|
|
// Auto-scroll to the active segment during playback
|
|
$effect(() => {
|
|
if (!$isPlaying || !autoScroll || !transcriptContainer) return;
|
|
const currentMs = $currentTimeMs;
|
|
const activeSegment = $segments.find(s => isSegmentActive(s, currentMs));
|
|
if (!activeSegment || activeSegment.id === lastActiveSegmentId) return;
|
|
lastActiveSegmentId = activeSegment.id;
|
|
|
|
const el = transcriptContainer.querySelector(`[data-segment-id="${activeSegment.id}"]`);
|
|
if (el) {
|
|
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
|
}
|
|
});
|
|
</script>
|
|
|
|
<div class="transcript-editor" bind:this={transcriptContainer} onscroll={handleScroll}>
|
|
{#if $segments.length === 0}
|
|
<div class="empty-state">
|
|
<p>No transcript yet</p>
|
|
<p class="hint">Import an audio file and run transcription to get started</p>
|
|
</div>
|
|
{:else}
|
|
{#each $segments as segment (segment.id)}
|
|
<div
|
|
class="segment"
|
|
class:active={isSegmentActive(segment, $currentTimeMs)}
|
|
data-segment-id={segment.id}
|
|
>
|
|
<div class="segment-header">
|
|
<span
|
|
class="speaker-label"
|
|
style="border-left-color: {getSpeakerColor(segment.speaker_id, $speakers)}"
|
|
>
|
|
{getSpeakerName(segment.speaker_id, $speakers)}
|
|
</span>
|
|
<span class="timestamp">{formatTimestamp(segment.start_ms)}</span>
|
|
</div>
|
|
{#if editingSegmentId === segment.id}
|
|
<div class="segment-edit">
|
|
<textarea
|
|
class="edit-textarea"
|
|
bind:value={editText}
|
|
onblur={() => finishEditing(segment.id)}
|
|
onkeydown={(e) => handleEditKeydown(e, segment.id)}
|
|
></textarea>
|
|
<span class="edit-hint">Enter to save, Esc to cancel</span>
|
|
</div>
|
|
{:else}
|
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
|
<div class="segment-text" ondblclick={() => startEditing(segment)}>
|
|
{#each segment.words as word (word.id)}
|
|
<span
|
|
class="word"
|
|
class:word-active={isWordActive(word, $currentTimeMs)}
|
|
onclick={() => handleWordClick(word)}
|
|
role="button"
|
|
tabindex="0"
|
|
onkeydown={(e) => { if (e.key === 'Enter') handleWordClick(word); }}
|
|
>{word.word} </span>
|
|
{:else}
|
|
<span class="segment-plain-text">{segment.text}</span>
|
|
{/each}
|
|
{#if segment.is_edited}
|
|
<span class="edited-badge">edited</span>
|
|
{/if}
|
|
</div>
|
|
{/if}
|
|
</div>
|
|
{/each}
|
|
{/if}
|
|
</div>
|
|
|
|
<style>
|
|
.transcript-editor {
|
|
flex: 1;
|
|
overflow-y: auto;
|
|
padding: 1rem;
|
|
background: #16213e;
|
|
border-radius: 8px;
|
|
color: #e0e0e0;
|
|
}
|
|
.empty-state {
|
|
display: flex;
|
|
flex-direction: column;
|
|
align-items: center;
|
|
justify-content: center;
|
|
height: 100%;
|
|
color: #666;
|
|
}
|
|
.hint {
|
|
font-size: 0.875rem;
|
|
color: #555;
|
|
}
|
|
.segment {
|
|
margin-bottom: 1rem;
|
|
padding: 0.5rem;
|
|
border-radius: 4px;
|
|
transition: background-color 0.2s;
|
|
}
|
|
.segment.active {
|
|
background: rgba(233, 69, 96, 0.1);
|
|
}
|
|
.segment-header {
|
|
display: flex;
|
|
align-items: center;
|
|
gap: 0.5rem;
|
|
margin-bottom: 0.25rem;
|
|
}
|
|
.speaker-label {
|
|
font-weight: 600;
|
|
font-size: 0.875rem;
|
|
border-left: 3px solid;
|
|
padding-left: 0.5rem;
|
|
}
|
|
.timestamp {
|
|
color: #666;
|
|
font-size: 0.75rem;
|
|
font-variant-numeric: tabular-nums;
|
|
}
|
|
.segment-text {
|
|
line-height: 1.6;
|
|
padding-left: 0.75rem;
|
|
word-wrap: break-word;
|
|
overflow-wrap: break-word;
|
|
}
|
|
.word {
|
|
cursor: pointer;
|
|
border-radius: 2px;
|
|
padding: 0 1px;
|
|
transition: background-color 0.15s;
|
|
}
|
|
.word:hover {
|
|
background: rgba(233, 69, 96, 0.2);
|
|
}
|
|
.word-active {
|
|
background: rgba(233, 69, 96, 0.35);
|
|
color: #fff;
|
|
}
|
|
.segment-plain-text {
|
|
color: #ccc;
|
|
}
|
|
.segment-edit {
|
|
padding-left: 0.75rem;
|
|
}
|
|
.edit-textarea {
|
|
width: 100%;
|
|
min-height: 3rem;
|
|
background: #1a1a2e;
|
|
color: #e0e0e0;
|
|
border: 1px solid #e94560;
|
|
border-radius: 4px;
|
|
padding: 0.5rem;
|
|
font-family: inherit;
|
|
font-size: inherit;
|
|
line-height: 1.6;
|
|
resize: vertical;
|
|
}
|
|
.edit-textarea:focus {
|
|
outline: none;
|
|
border-color: #ff6b81;
|
|
}
|
|
.edit-hint {
|
|
font-size: 0.7rem;
|
|
color: #666;
|
|
}
|
|
.edited-badge {
|
|
font-size: 0.65rem;
|
|
color: #e94560;
|
|
background: rgba(233, 69, 96, 0.15);
|
|
padding: 0.1rem 0.3rem;
|
|
border-radius: 3px;
|
|
margin-left: 0.5rem;
|
|
vertical-align: middle;
|
|
}
|
|
</style>
|