Stream transcript segments to frontend as they are transcribed

Send each segment to the frontend immediately after transcription via a new pipeline.segment IPC message, then send speaker assignments as a batch pipeline.speaker_update message after diarization completes. This lets the UI display segments progressively instead of waiting for the entire pipeline to finish. Changes: - Add partial_segment_message and speaker_update_message IPC factories - Add on_segment callback parameter to TranscribeService.transcribe() - Emit partial segments and speaker updates from PipelineService.run() - Add send_and_receive_with_progress to SidecarManager (Rust) - Route pipeline.segment/speaker_update events in run_pipeline command - Listen for streaming events in Svelte frontend (+page.svelte) - Add tests for new message types, callback signature, and update logic Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 13:47:57 -07:00
parent d00281f0c7
commit 67ed69df00
9 changed files with 223 additions and 2 deletions
--- a/python/voice_to_notes/services/pipeline.py
+++ b/python/voice_to_notes/services/pipeline.py
@@ -7,7 +7,11 @@ import time
 from dataclasses import dataclass, field
 from typing import Any

-from voice_to_notes.ipc.messages import progress_message
+from voice_to_notes.ipc.messages import (
+    partial_segment_message,
+    progress_message,
+    speaker_update_message,
+)
 from voice_to_notes.ipc.protocol import write_message
 from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
 from voice_to_notes.services.transcribe import (
@@ -82,6 +86,15 @@ class PipelineService:
            progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
        )

+        def _emit_segment(seg: SegmentResult, index: int) -> None:
+            write_message(partial_segment_message(request_id, {
+                "index": index,
+                "text": seg.text,
+                "start_ms": seg.start_ms,
+                "end_ms": seg.end_ms,
+                "words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
+            }))
+
        transcription = self._transcribe_service.transcribe(
            request_id=request_id,
            file_path=file_path,
@@ -89,6 +102,7 @@ class PipelineService:
            device=device,
            compute_type=compute_type,
            language=language,
+            on_segment=_emit_segment,
        )

        if skip_diarization:
@@ -140,6 +154,10 @@ class PipelineService:
            flush=True,
        )

+        updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
+        if updates:
+            write_message(speaker_update_message(request_id, updates))
+
        write_message(
            progress_message(request_id, 100, "done", "Pipeline complete")
        )
--- a/python/voice_to_notes/services/transcribe.py
+++ b/python/voice_to_notes/services/transcribe.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import sys
 import time
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any

@@ -90,6 +91,7 @@ class TranscribeService:
        device: str = "cpu",
        compute_type: str = "int8",
        language: str | None = None,
+        on_segment: Callable[[SegmentResult, int], None] | None = None,
    ) -> TranscriptionResult:
        """Transcribe an audio file with word-level timestamps.

@@ -145,6 +147,9 @@ class TranscribeService:
                )
            )

+            if on_segment:
+                on_segment(result.segments[-1], segment_count - 1)
+
            # Send progress every few segments
            if segment_count % 5 == 0:
                write_message(