perf/pipeline-improvements #2
@@ -63,25 +63,7 @@
|
|||||||
segments.update(segs => segs.map(s => {
|
segments.update(segs => segs.map(s => {
|
||||||
if (s.id !== segmentId) return s;
|
if (s.id !== segmentId) return s;
|
||||||
const newWordTexts = trimmed.split(/\s+/);
|
const newWordTexts = trimmed.split(/\s+/);
|
||||||
let newWords;
|
const newWords = redistributeWords(s, newWordTexts);
|
||||||
if (newWordTexts.length === s.words.length) {
|
|
||||||
// Same word count (e.g. spelling fix) — preserve each word's timing
|
|
||||||
newWords = s.words.map((w, widx) => ({
|
|
||||||
...w,
|
|
||||||
word: newWordTexts[widx],
|
|
||||||
}));
|
|
||||||
} else {
|
|
||||||
// Word count changed — fall back to segment-level timing
|
|
||||||
newWords = newWordTexts.map((word, widx) => ({
|
|
||||||
id: `${s.id}-word-${widx}`,
|
|
||||||
segment_id: s.id,
|
|
||||||
word,
|
|
||||||
start_ms: s.start_ms,
|
|
||||||
end_ms: s.end_ms,
|
|
||||||
confidence: 1.0,
|
|
||||||
word_index: widx,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
return {
|
return {
|
||||||
...s,
|
...s,
|
||||||
text: trimmed,
|
text: trimmed,
|
||||||
@@ -96,6 +78,106 @@
|
|||||||
editingSegmentId = null;
|
editingSegmentId = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Redistribute word timing after an edit.
|
||||||
|
*
|
||||||
|
* Uses a diff-like alignment between old and new word lists:
|
||||||
|
* - Unchanged words keep their original timing
|
||||||
|
* - Spelling fixes (same position, same count) keep timing
|
||||||
|
* - Split words (1 old → N new) divide the original time range proportionally
|
||||||
|
* - Inserted words with no match get interpolated timing
|
||||||
|
*/
|
||||||
|
function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
|
||||||
|
const oldWords = segment.words;
|
||||||
|
|
||||||
|
// Same word count — preserve per-word timing (spelling fixes)
|
||||||
|
if (newWordTexts.length === oldWords.length) {
|
||||||
|
return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Align old words to new words using a simple greedy match.
|
||||||
|
// Build a mapping: for each old word, which new words does it cover?
|
||||||
|
const oldTexts = oldWords.map(w => w.word.toLowerCase());
|
||||||
|
const newTexts = newWordTexts.map(w => w.toLowerCase());
|
||||||
|
|
||||||
|
// Walk both lists, greedily matching old words to new words
|
||||||
|
const result: Word[] = [];
|
||||||
|
let oldIdx = 0;
|
||||||
|
let newIdx = 0;
|
||||||
|
|
||||||
|
while (newIdx < newTexts.length) {
|
||||||
|
if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
|
||||||
|
// Exact match — keep original timing
|
||||||
|
result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
|
||||||
|
oldIdx++;
|
||||||
|
newIdx++;
|
||||||
|
} else if (oldIdx < oldTexts.length) {
|
||||||
|
// Check if old word was split into multiple new words.
|
||||||
|
// E.g., "gonna" → "going to": see if concatenating upcoming new words
|
||||||
|
// matches the old word (or close enough — just check if old word's chars
|
||||||
|
// are consumed by the next few new words).
|
||||||
|
let splitCount = 0;
|
||||||
|
let combined = '';
|
||||||
|
for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
|
||||||
|
combined += (k > newIdx ? '' : '') + newTexts[k];
|
||||||
|
if (combined.length >= oldTexts[oldIdx].length) {
|
||||||
|
splitCount = k - newIdx + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (splitCount > 1) {
|
||||||
|
// Split: distribute the old word's time range proportionally
|
||||||
|
const ow = oldWords[oldIdx];
|
||||||
|
const totalDuration = ow.end_ms - ow.start_ms;
|
||||||
|
for (let k = 0; k < splitCount; k++) {
|
||||||
|
const fraction = 1 / splitCount;
|
||||||
|
result.push({
|
||||||
|
id: `${segment.id}-word-${newIdx + k}`,
|
||||||
|
segment_id: segment.id,
|
||||||
|
word: newWordTexts[newIdx + k],
|
||||||
|
start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
|
||||||
|
end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
|
||||||
|
confidence: ow.confidence,
|
||||||
|
word_index: newIdx + k,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
oldIdx++;
|
||||||
|
newIdx += splitCount;
|
||||||
|
} else {
|
||||||
|
// No match found — interpolate timing from neighbors
|
||||||
|
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
|
||||||
|
const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
|
||||||
|
result.push({
|
||||||
|
id: `${segment.id}-word-${newIdx}`,
|
||||||
|
segment_id: segment.id,
|
||||||
|
word: newWordTexts[newIdx],
|
||||||
|
start_ms: prevEnd,
|
||||||
|
end_ms: nextStart,
|
||||||
|
confidence: 1.0,
|
||||||
|
word_index: newIdx,
|
||||||
|
});
|
||||||
|
newIdx++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No more old words — use end of segment
|
||||||
|
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
|
||||||
|
result.push({
|
||||||
|
id: `${segment.id}-word-${newIdx}`,
|
||||||
|
segment_id: segment.id,
|
||||||
|
word: newWordTexts[newIdx],
|
||||||
|
start_ms: prevEnd,
|
||||||
|
end_ms: segment.end_ms,
|
||||||
|
confidence: 1.0,
|
||||||
|
word_index: newIdx,
|
||||||
|
});
|
||||||
|
newIdx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
|
function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
|
||||||
if (e.key === 'Escape') {
|
if (e.key === 'Escape') {
|
||||||
editingSegmentId = null;
|
editingSegmentId = null;
|
||||||
|
|||||||
Reference in New Issue
Block a user