Merge perf/stream-segments: streaming partial transcript segments and speaker updates
This commit is contained in:
@@ -7,7 +7,11 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from voice_to_notes.ipc.messages import progress_message
|
||||
from voice_to_notes.ipc.messages import (
|
||||
partial_segment_message,
|
||||
progress_message,
|
||||
speaker_update_message,
|
||||
)
|
||||
from voice_to_notes.ipc.protocol import write_message
|
||||
from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
|
||||
from voice_to_notes.services.transcribe import (
|
||||
@@ -83,6 +87,15 @@ class PipelineService:
|
||||
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
|
||||
)
|
||||
|
||||
def _emit_segment(seg: SegmentResult, index: int) -> None:
|
||||
write_message(partial_segment_message(request_id, {
|
||||
"index": index,
|
||||
"text": seg.text,
|
||||
"start_ms": seg.start_ms,
|
||||
"end_ms": seg.end_ms,
|
||||
"words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
|
||||
}))
|
||||
|
||||
transcription = self._transcribe_service.transcribe(
|
||||
request_id=request_id,
|
||||
file_path=file_path,
|
||||
@@ -90,6 +103,7 @@ class PipelineService:
|
||||
device=device,
|
||||
compute_type=compute_type,
|
||||
language=language,
|
||||
on_segment=_emit_segment,
|
||||
)
|
||||
|
||||
if skip_diarization:
|
||||
@@ -174,6 +188,10 @@ class PipelineService:
|
||||
flush=True,
|
||||
)
|
||||
|
||||
updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
|
||||
if updates:
|
||||
write_message(speaker_update_message(request_id, updates))
|
||||
|
||||
write_message(
|
||||
progress_message(request_id, 100, "done", "Pipeline complete")
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user