"""Tests for pipeline service data structures and merge logic.""" from voice_to_notes.services.diarize import SpeakerSegment from voice_to_notes.services.pipeline import ( PipelineResult, PipelineSegment, PipelineService, pipeline_result_to_payload, ) from voice_to_notes.services.transcribe import ( SegmentResult, TranscriptionResult, WordResult, ) def test_pipeline_result_to_payload(): result = PipelineResult( segments=[ PipelineSegment( text="Hello world", start_ms=0, end_ms=2000, speaker="SPEAKER_00", words=[ WordResult(word="Hello", start_ms=0, end_ms=800, confidence=0.95), WordResult(word="world", start_ms=900, end_ms=2000, confidence=0.88), ], ), ], language="en", language_probability=0.98, duration_ms=10000, speakers=["SPEAKER_00", "SPEAKER_01"], num_speakers=2, ) payload = pipeline_result_to_payload(result) assert payload["language"] == "en" assert payload["num_speakers"] == 2 assert len(payload["segments"]) == 1 assert payload["segments"][0]["speaker"] == "SPEAKER_00" assert len(payload["segments"][0]["words"]) == 2 def test_pipeline_result_to_payload_empty(): result = PipelineResult() payload = pipeline_result_to_payload(result) assert payload["segments"] == [] assert payload["speakers"] == [] assert payload["num_speakers"] == 0 def test_merge_results_assigns_speakers(): """Test that _merge_results correctly assigns speakers based on overlap.""" service = PipelineService() transcription = TranscriptionResult( segments=[ SegmentResult(text="Hello there", start_ms=0, end_ms=3000, words=[]), SegmentResult(text="How are you", start_ms=4000, end_ms=7000, words=[]), ], language="en", language_probability=0.99, duration_ms=10000, ) speaker_segments = [ SpeakerSegment(speaker="SPEAKER_00", start_ms=0, end_ms=3500), SpeakerSegment(speaker="SPEAKER_01", start_ms=3500, end_ms=8000), ] result = service._merge_results(transcription, speaker_segments) assert len(result.segments) == 2 assert result.segments[0].speaker == "SPEAKER_00" assert result.segments[1].speaker == "SPEAKER_01" def test_merge_results_no_speaker_segments(): """With no speaker segments, all speakers should be None.""" service = PipelineService() transcription = TranscriptionResult( segments=[SegmentResult(text="Hello", start_ms=0, end_ms=1000, words=[])], language="en", language_probability=0.99, duration_ms=1000, ) result = service._merge_results(transcription, []) assert result.segments[0].speaker is None