Stream transcript segments to frontend as they are transcribed

Send each segment to the frontend immediately after transcription via
a new pipeline.segment IPC message, then send speaker assignments as a
batch pipeline.speaker_update message after diarization completes. This
lets the UI display segments progressively instead of waiting for the
entire pipeline to finish.

Changes:
- Add partial_segment_message and speaker_update_message IPC factories
- Add on_segment callback parameter to TranscribeService.transcribe()
- Emit partial segments and speaker updates from PipelineService.run()
- Add send_and_receive_with_progress to SidecarManager (Rust)
- Route pipeline.segment/speaker_update events in run_pipeline command
- Listen for streaming events in Svelte frontend (+page.svelte)
- Add tests for new message types, callback signature, and update logic

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-20 13:47:57 -07:00
parent d00281f0c7
commit 67ed69df00
9 changed files with 223 additions and 2 deletions

View File

@@ -7,7 +7,11 @@ import time
from dataclasses import dataclass, field
from typing import Any
from voice_to_notes.ipc.messages import progress_message
from voice_to_notes.ipc.messages import (
partial_segment_message,
progress_message,
speaker_update_message,
)
from voice_to_notes.ipc.protocol import write_message
from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
from voice_to_notes.services.transcribe import (
@@ -82,6 +86,15 @@ class PipelineService:
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
)
def _emit_segment(seg: SegmentResult, index: int) -> None:
write_message(partial_segment_message(request_id, {
"index": index,
"text": seg.text,
"start_ms": seg.start_ms,
"end_ms": seg.end_ms,
"words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
}))
transcription = self._transcribe_service.transcribe(
request_id=request_id,
file_path=file_path,
@@ -89,6 +102,7 @@ class PipelineService:
device=device,
compute_type=compute_type,
language=language,
on_segment=_emit_segment,
)
if skip_diarization:
@@ -140,6 +154,10 @@ class PipelineService:
flush=True,
)
updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
if updates:
write_message(speaker_update_message(request_id, updates))
write_message(
progress_message(request_id, 100, "done", "Pipeline complete")
)

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import sys
import time
from collections.abc import Callable
from dataclasses import dataclass, field
from typing import Any
@@ -90,6 +91,7 @@ class TranscribeService:
device: str = "cpu",
compute_type: str = "int8",
language: str | None = None,
on_segment: Callable[[SegmentResult, int], None] | None = None,
) -> TranscriptionResult:
"""Transcribe an audio file with word-level timestamps.
@@ -145,6 +147,9 @@ class TranscribeService:
)
)
if on_segment:
on_segment(result.segments[-1], segment_count - 1)
# Send progress every few segments
if segment_count % 5 == 0:
write_message(