Phase 3: Speaker diarization and full transcription pipeline

- Implement DiarizeService with pyannote.audio speaker detection - Build PipelineService combining transcribe → diarize → merge with overlap-based speaker assignment per segment - Add pipeline.start and diarize.start IPC handlers - Add run_pipeline Tauri command for full pipeline execution - Wire frontend to use pipeline: speakers auto-created with colors, segments assigned to detected speakers - Build SpeakerManager with rename support (double-click or edit button) - Add speaker color coding throughout transcript display - Add pyannote.audio dependency - Tests: 24 Python (including merge logic), 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 16:09:48 -08:00
parent 842f8d5f90
commit 44480906a4
12 changed files with 806 additions and 24 deletions
--- a/python/tests/test_diarize.py
+++ b/python/tests/test_diarize.py
@@ -0,0 +1,33 @@
+"""Tests for diarization service data structures and payload conversion."""
+
+from voice_to_notes.services.diarize import (
+    DiarizationResult,
+    SpeakerSegment,
+    diarization_to_payload,
+)
+
+
+def test_diarization_to_payload():
+    result = DiarizationResult(
+        speaker_segments=[
+            SpeakerSegment(speaker="SPEAKER_00", start_ms=0, end_ms=5000),
+            SpeakerSegment(speaker="SPEAKER_01", start_ms=5000, end_ms=10000),
+            SpeakerSegment(speaker="SPEAKER_00", start_ms=10000, end_ms=15000),
+        ],
+        num_speakers=2,
+        speakers=["SPEAKER_00", "SPEAKER_01"],
+    )
+    payload = diarization_to_payload(result)
+    assert payload["num_speakers"] == 2
+    assert len(payload["speaker_segments"]) == 3
+    assert payload["speakers"] == ["SPEAKER_00", "SPEAKER_01"]
+    assert payload["speaker_segments"][0]["speaker"] == "SPEAKER_00"
+    assert payload["speaker_segments"][1]["start_ms"] == 5000
+
+
+def test_diarization_to_payload_empty():
+    result = DiarizationResult()
+    payload = diarization_to_payload(result)
+    assert payload["num_speakers"] == 0
+    assert payload["speaker_segments"] == []
+    assert payload["speakers"] == []