Phase 3: Speaker diarization and full transcription pipeline

- Implement DiarizeService with pyannote.audio speaker detection
- Build PipelineService combining transcribe → diarize → merge with
  overlap-based speaker assignment per segment
- Add pipeline.start and diarize.start IPC handlers
- Add run_pipeline Tauri command for full pipeline execution
- Wire frontend to use pipeline: speakers auto-created with colors,
  segments assigned to detected speakers
- Build SpeakerManager with rename support (double-click or edit button)
- Add speaker color coding throughout transcript display
- Add pyannote.audio dependency
- Tests: 24 Python (including merge logic), 6 Rust, 0 Svelte errors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 16:09:48 -08:00
parent 842f8d5f90
commit 44480906a4
12 changed files with 806 additions and 24 deletions

View File

@@ -64,6 +64,59 @@ def make_transcribe_handler() -> HandlerFunc:
return handler
def make_diarize_handler() -> HandlerFunc:
"""Create a diarization handler with a persistent DiarizeService."""
from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
service = DiarizeService()
def handler(msg: IPCMessage) -> IPCMessage:
payload = msg.payload
result = service.diarize(
request_id=msg.id,
file_path=payload["file"],
num_speakers=payload.get("num_speakers"),
min_speakers=payload.get("min_speakers"),
max_speakers=payload.get("max_speakers"),
)
return IPCMessage(
id=msg.id,
type="diarize.result",
payload=diarization_to_payload(result),
)
return handler
def make_pipeline_handler() -> HandlerFunc:
"""Create a full pipeline handler (transcribe + diarize + merge)."""
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
service = PipelineService()
def handler(msg: IPCMessage) -> IPCMessage:
payload = msg.payload
result = service.run(
request_id=msg.id,
file_path=payload["file"],
model_name=payload.get("model", "base"),
device=payload.get("device", "cpu"),
compute_type=payload.get("compute_type", "int8"),
language=payload.get("language"),
num_speakers=payload.get("num_speakers"),
min_speakers=payload.get("min_speakers"),
max_speakers=payload.get("max_speakers"),
skip_diarization=payload.get("skip_diarization", False),
)
return IPCMessage(
id=msg.id,
type="pipeline.result",
payload=pipeline_result_to_payload(result),
)
return handler
def hardware_detect_handler(msg: IPCMessage) -> IPCMessage:
"""Detect hardware capabilities and return recommendations."""
from voice_to_notes.hardware.detect import detect_hardware