python/tests/test_transcribe.py

"""Tests for transcription service."""

from voice_to_notes.services.transcribe import (
    SegmentResult,
    TranscriptionResult,
    WordResult,
    result_to_payload,
)


def test_result_to_payload():
    """Test converting TranscriptionResult to IPC payload."""
    result = TranscriptionResult(
        segments=[
            SegmentResult(
                text="hello world",
                start_ms=0,
                end_ms=2000,
                words=[
                    WordResult(word="hello", start_ms=0, end_ms=500, confidence=0.95),
                    WordResult(word="world", start_ms=600, end_ms=2000, confidence=0.92),
                ],
            ),
        ],
        language="en",
        language_probability=0.98,
        duration_ms=2000,
    )

    payload = result_to_payload(result)

    assert payload["language"] == "en"
    assert payload["duration_ms"] == 2000
    assert len(payload["segments"]) == 1

    seg = payload["segments"][0]
    assert seg["text"] == "hello world"
    assert seg["start_ms"] == 0
    assert seg["end_ms"] == 2000
    assert len(seg["words"]) == 2
    assert seg["words"][0]["word"] == "hello"
    assert seg["words"][0]["confidence"] == 0.95


def test_result_to_payload_empty():
    """Test empty transcription result."""
    result = TranscriptionResult()
    payload = result_to_payload(result)
    assert payload["segments"] == []
    assert payload["language"] == ""
    assert payload["duration_ms"] == 0


def test_chunk_report_size_progress():
    """Test CHUNK_REPORT_SIZE progress emission."""
    from voice_to_notes.services.transcribe import CHUNK_REPORT_SIZE
    assert CHUNK_REPORT_SIZE == 10


def test_transcribe_chunked_with_mocked_ffmpeg(monkeypatch):
    """Test transcribe_chunked with mocked ffmpeg/ffprobe and mocked WhisperModel."""
    from unittest.mock import MagicMock, patch
    from voice_to_notes.services.transcribe import TranscribeService, SegmentResult, WordResult

    # Mock subprocess.run for ffprobe (returns duration of 700s = ~2 chunks at 300s each)
    original_run = __import__("subprocess").run

    def mock_subprocess_run(cmd, **kwargs):
        if "ffprobe" in cmd:
            result = MagicMock()
            result.stdout = "700.0\n"
            result.returncode = 0
            return result
        elif "ffmpeg" in cmd:
            # Create an empty temp file (simulate chunk extraction)
            # The output file is the last argument
            import pathlib
            output_file = cmd[-1]
            pathlib.Path(output_file).touch()
            result = MagicMock()
            result.returncode = 0
            return result
        return original_run(cmd, **kwargs)

    # Mock WhisperModel
    mock_model = MagicMock()
    def mock_transcribe_call(file_path, **kwargs):
        mock_segments = []
        for i in range(3):
            seg = MagicMock()
            seg.start = i * 1.0
            seg.end = (i + 1) * 1.0
            seg.text = f"Segment {i}"
            seg.words = []
            mock_segments.append(seg)
        mock_info = MagicMock()
        mock_info.language = "en"
        mock_info.language_probability = 0.99
        mock_info.duration = 300.0
        return iter(mock_segments), mock_info

    mock_model.transcribe = mock_transcribe_call

    service = TranscribeService()
    service._model = mock_model
    service._current_model_name = "base"
    service._current_device = "cpu"
    service._current_compute_type = "int8"

    written_messages = []
    def mock_write(msg):
        written_messages.append(msg)

    with patch("subprocess.run", mock_subprocess_run), \
         patch("voice_to_notes.services.transcribe.write_message", mock_write):
        result = service.transcribe_chunked("req-1", "/fake/long_audio.wav")

    # Should have segments from multiple chunks
    assert len(result.segments) > 0

    # Verify timestamp offsets — segments from chunk 1 should start at 0,
    # segments from chunk 2 should be offset by 300000ms
    if len(result.segments) > 3:
        # Chunk 2 segments should have offset timestamps
        assert result.segments[3].start_ms >= 300000

    assert result.duration_ms == 700000
    assert result.language == "en"
Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00			`"""Tests for transcription service."""`

			`from voice_to_notes.services.transcribe import (`
			`SegmentResult,`
			`TranscriptionResult,`
			`WordResult,`
			`result_to_payload,`
			`)`


			`def test_result_to_payload():`
			`"""Test converting TranscriptionResult to IPC payload."""`
			`result = TranscriptionResult(`
			`segments=[`
			`SegmentResult(`
			`text="hello world",`
			`start_ms=0,`
			`end_ms=2000,`
			`words=[`
			`WordResult(word="hello", start_ms=0, end_ms=500, confidence=0.95),`
			`WordResult(word="world", start_ms=600, end_ms=2000, confidence=0.92),`
			`],`
			`),`
			`],`
			`language="en",`
			`language_probability=0.98,`
			`duration_ms=2000,`
			`)`

			`payload = result_to_payload(result)`

			`assert payload["language"] == "en"`
			`assert payload["duration_ms"] == 2000`
			`assert len(payload["segments"]) == 1`

			`seg = payload["segments"][0]`
			`assert seg["text"] == "hello world"`
			`assert seg["start_ms"] == 0`
			`assert seg["end_ms"] == 2000`
			`assert len(seg["words"]) == 2`
			`assert seg["words"][0]["word"] == "hello"`
			`assert seg["words"][0]["confidence"] == 0.95`


			`def test_result_to_payload_empty():`
			`"""Test empty transcription result."""`
			`result = TranscriptionResult()`
			`payload = result_to_payload(result)`
			`assert payload["segments"] == []`
			`assert payload["language"] == ""`
			`assert payload["duration_ms"] == 0`
Add chunked transcription for large audio files (>1 hour) Split files >1 hour into 5-minute chunks via ffmpeg, transcribe each chunk independently, then merge results with corrected timestamps. Also add chunk-level progress markers every 10 segments for all files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-20 13:49:20 -07:00

			`def test_chunk_report_size_progress():`
			`"""Test CHUNK_REPORT_SIZE progress emission."""`
			`from voice_to_notes.services.transcribe import CHUNK_REPORT_SIZE`
			`assert CHUNK_REPORT_SIZE == 10`


			`def test_transcribe_chunked_with_mocked_ffmpeg(monkeypatch):`
			`"""Test transcribe_chunked with mocked ffmpeg/ffprobe and mocked WhisperModel."""`
			`from unittest.mock import MagicMock, patch`
			`from voice_to_notes.services.transcribe import TranscribeService, SegmentResult, WordResult`

			`# Mock subprocess.run for ffprobe (returns duration of 700s = ~2 chunks at 300s each)`
			`original_run = __import__("subprocess").run`

			`def mock_subprocess_run(cmd, **kwargs):`
			`if "ffprobe" in cmd:`
			`result = MagicMock()`
			`result.stdout = "700.0\n"`
			`result.returncode = 0`
			`return result`
			`elif "ffmpeg" in cmd:`
			`# Create an empty temp file (simulate chunk extraction)`
			`# The output file is the last argument`
			`import pathlib`
			`output_file = cmd[-1]`
			`pathlib.Path(output_file).touch()`
			`result = MagicMock()`
			`result.returncode = 0`
			`return result`
			`return original_run(cmd, **kwargs)`

			`# Mock WhisperModel`
			`mock_model = MagicMock()`
			`def mock_transcribe_call(file_path, **kwargs):`
			`mock_segments = []`
			`for i in range(3):`
			`seg = MagicMock()`
			`seg.start = i * 1.0`
			`seg.end = (i + 1) * 1.0`
			`seg.text = f"Segment {i}"`
			`seg.words = []`
			`mock_segments.append(seg)`
			`mock_info = MagicMock()`
			`mock_info.language = "en"`
			`mock_info.language_probability = 0.99`
			`mock_info.duration = 300.0`
			`return iter(mock_segments), mock_info`

			`mock_model.transcribe = mock_transcribe_call`

			`service = TranscribeService()`
			`service._model = mock_model`
			`service._current_model_name = "base"`
			`service._current_device = "cpu"`
			`service._current_compute_type = "int8"`

			`written_messages = []`
			`def mock_write(msg):`
			`written_messages.append(msg)`

			`with patch("subprocess.run", mock_subprocess_run), \`
			`patch("voice_to_notes.services.transcribe.write_message", mock_write):`
			`result = service.transcribe_chunked("req-1", "/fake/long_audio.wav")`

			`# Should have segments from multiple chunks`
			`assert len(result.segments) > 0`

			`# Verify timestamp offsets — segments from chunk 1 should start at 0,`
			`# segments from chunk 2 should be offset by 300000ms`
			`if len(result.segments) > 3:`
			`# Chunk 2 segments should have offset timestamps`
			`assert result.segments[3].start_ms >= 300000`

			`assert result.duration_ms == 700000`
			`assert result.language == "en"`