"""Tests for transcription service.""" from voice_to_notes.services.transcribe import ( SegmentResult, TranscriptionResult, WordResult, result_to_payload, ) def test_result_to_payload(): """Test converting TranscriptionResult to IPC payload.""" result = TranscriptionResult( segments=[ SegmentResult( text="hello world", start_ms=0, end_ms=2000, words=[ WordResult(word="hello", start_ms=0, end_ms=500, confidence=0.95), WordResult(word="world", start_ms=600, end_ms=2000, confidence=0.92), ], ), ], language="en", language_probability=0.98, duration_ms=2000, ) payload = result_to_payload(result) assert payload["language"] == "en" assert payload["duration_ms"] == 2000 assert len(payload["segments"]) == 1 seg = payload["segments"][0] assert seg["text"] == "hello world" assert seg["start_ms"] == 0 assert seg["end_ms"] == 2000 assert len(seg["words"]) == 2 assert seg["words"][0]["word"] == "hello" assert seg["words"][0]["confidence"] == 0.95 def test_result_to_payload_empty(): """Test empty transcription result.""" result = TranscriptionResult() payload = result_to_payload(result) assert payload["segments"] == [] assert payload["language"] == "" assert payload["duration_ms"] == 0 def test_chunk_report_size_progress(): """Test CHUNK_REPORT_SIZE progress emission.""" from voice_to_notes.services.transcribe import CHUNK_REPORT_SIZE assert CHUNK_REPORT_SIZE == 10 def test_transcribe_chunked_with_mocked_ffmpeg(monkeypatch): """Test transcribe_chunked with mocked ffmpeg/ffprobe and mocked WhisperModel.""" from unittest.mock import MagicMock, patch from voice_to_notes.services.transcribe import TranscribeService, SegmentResult, WordResult # Mock subprocess.run for ffprobe (returns duration of 700s = ~2 chunks at 300s each) original_run = __import__("subprocess").run def mock_subprocess_run(cmd, **kwargs): if "ffprobe" in cmd: result = MagicMock() result.stdout = "700.0\n" result.returncode = 0 return result elif "ffmpeg" in cmd: # Create an empty temp file (simulate chunk extraction) # The output file is the last argument import pathlib output_file = cmd[-1] pathlib.Path(output_file).touch() result = MagicMock() result.returncode = 0 return result return original_run(cmd, **kwargs) # Mock WhisperModel mock_model = MagicMock() def mock_transcribe_call(file_path, **kwargs): mock_segments = [] for i in range(3): seg = MagicMock() seg.start = i * 1.0 seg.end = (i + 1) * 1.0 seg.text = f"Segment {i}" seg.words = [] mock_segments.append(seg) mock_info = MagicMock() mock_info.language = "en" mock_info.language_probability = 0.99 mock_info.duration = 300.0 return iter(mock_segments), mock_info mock_model.transcribe = mock_transcribe_call service = TranscribeService() service._model = mock_model service._current_model_name = "base" service._current_device = "cpu" service._current_compute_type = "int8" written_messages = [] def mock_write(msg): written_messages.append(msg) with patch("subprocess.run", mock_subprocess_run), \ patch("voice_to_notes.services.transcribe.write_message", mock_write): result = service.transcribe_chunked("req-1", "/fake/long_audio.wav") # Should have segments from multiple chunks assert len(result.segments) > 0 # Verify timestamp offsets — segments from chunk 1 should start at 0, # segments from chunk 2 should be offset by 300000ms if len(result.segments) > 3: # Chunk 2 segments should have offset timestamps assert result.segments[3].start_ms >= 300000 assert result.duration_ms == 700000 assert result.language == "en"