2026-03-21 05:29:36 +00:00
1 changed files with 101 additions and 19 deletions
--- a/src/lib/components/TranscriptEditor.svelte
+++ b/src/lib/components/TranscriptEditor.svelte
@@ -63,25 +63,7 @@
      segments.update(segs => segs.map(s => {
        if (s.id !== segmentId) return s;
        const newWordTexts = trimmed.split(/\s+/);
-        let newWords;
+        const newWords = redistributeWords(s, newWordTexts);
        if (newWordTexts.length === s.words.length) {
          // Same word count (e.g. spelling fix) — preserve each word's timing
          newWords = s.words.map((w, widx) => ({
            ...w,
            word: newWordTexts[widx],
          }));
        } else {
          // Word count changed — fall back to segment-level timing
          newWords = newWordTexts.map((word, widx) => ({
            id: `${s.id}-word-${widx}`,
            segment_id: s.id,
            word,
            start_ms: s.start_ms,
            end_ms: s.end_ms,
            confidence: 1.0,
            word_index: widx,
          }));
        }
        return {
          ...s,
          text: trimmed,
@@ -96,6 +78,106 @@
    editingSegmentId = null;
  }
  /**
   * Redistribute word timing after an edit.
   *
   * Uses a diff-like alignment between old and new word lists:
   * - Unchanged words keep their original timing
   * - Spelling fixes (same position, same count) keep timing
   * - Split words (1 old → N new) divide the original time range proportionally
   * - Inserted words with no match get interpolated timing
   */
  function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
    const oldWords = segment.words;
    // Same word count — preserve per-word timing (spelling fixes)
    if (newWordTexts.length === oldWords.length) {
      return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
    }
    // Align old words to new words using a simple greedy match.
    // Build a mapping: for each old word, which new words does it cover?
    const oldTexts = oldWords.map(w => w.word.toLowerCase());
    const newTexts = newWordTexts.map(w => w.toLowerCase());
    // Walk both lists, greedily matching old words to new words
    const result: Word[] = [];
    let oldIdx = 0;
    let newIdx = 0;
    while (newIdx < newTexts.length) {
      if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
        // Exact match — keep original timing
        result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
        oldIdx++;
        newIdx++;
      } else if (oldIdx < oldTexts.length) {
        // Check if old word was split into multiple new words.
        // E.g., "gonna" → "going to": see if concatenating upcoming new words
        // matches the old word (or close enough — just check if old word's chars
        // are consumed by the next few new words).
        let splitCount = 0;
        let combined = '';
        for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
          combined += (k > newIdx ? '' : '') + newTexts[k];
          if (combined.length >= oldTexts[oldIdx].length) {
            splitCount = k - newIdx + 1;
            break;
          }
        }
        if (splitCount > 1) {
          // Split: distribute the old word's time range proportionally
          const ow = oldWords[oldIdx];
          const totalDuration = ow.end_ms - ow.start_ms;
          for (let k = 0; k < splitCount; k++) {
            const fraction = 1 / splitCount;
            result.push({
              id: `${segment.id}-word-${newIdx + k}`,
              segment_id: segment.id,
              word: newWordTexts[newIdx + k],
              start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
              end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
              confidence: ow.confidence,
              word_index: newIdx + k,
            });
          }
          oldIdx++;
          newIdx += splitCount;
        } else {
          // No match found — interpolate timing from neighbors
          const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
          const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
          result.push({
            id: `${segment.id}-word-${newIdx}`,
            segment_id: segment.id,
            word: newWordTexts[newIdx],
            start_ms: prevEnd,
            end_ms: nextStart,
            confidence: 1.0,
            word_index: newIdx,
          });
          newIdx++;
        }
      } else {
        // No more old words — use end of segment
        const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
        result.push({
          id: `${segment.id}-word-${newIdx}`,
          segment_id: segment.id,
          word: newWordTexts[newIdx],
          start_ms: prevEnd,
          end_ms: segment.end_ms,
          confidence: 1.0,
          word_index: newIdx,
        });
        newIdx++;
      }
    }
    return result;
  }
  function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
    if (e.key === 'Escape') {
      editingSegmentId = null;