Phase 3: Speaker diarization and full transcription pipeline
- Implement DiarizeService with pyannote.audio speaker detection - Build PipelineService combining transcribe → diarize → merge with overlap-based speaker assignment per segment - Add pipeline.start and diarize.start IPC handlers - Add run_pipeline Tauri command for full pipeline execution - Wire frontend to use pipeline: speakers auto-created with colors, segments assigned to detected speakers - Build SpeakerManager with rename support (double-click or edit button) - Add speaker color coding throughout transcript display - Add pyannote.audio dependency - Tests: 24 Python (including merge logic), 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -64,6 +64,59 @@ def make_transcribe_handler() -> HandlerFunc:
|
||||
return handler
|
||||
|
||||
|
||||
def make_diarize_handler() -> HandlerFunc:
|
||||
"""Create a diarization handler with a persistent DiarizeService."""
|
||||
from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
|
||||
|
||||
service = DiarizeService()
|
||||
|
||||
def handler(msg: IPCMessage) -> IPCMessage:
|
||||
payload = msg.payload
|
||||
result = service.diarize(
|
||||
request_id=msg.id,
|
||||
file_path=payload["file"],
|
||||
num_speakers=payload.get("num_speakers"),
|
||||
min_speakers=payload.get("min_speakers"),
|
||||
max_speakers=payload.get("max_speakers"),
|
||||
)
|
||||
return IPCMessage(
|
||||
id=msg.id,
|
||||
type="diarize.result",
|
||||
payload=diarization_to_payload(result),
|
||||
)
|
||||
|
||||
return handler
|
||||
|
||||
|
||||
def make_pipeline_handler() -> HandlerFunc:
|
||||
"""Create a full pipeline handler (transcribe + diarize + merge)."""
|
||||
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
|
||||
|
||||
service = PipelineService()
|
||||
|
||||
def handler(msg: IPCMessage) -> IPCMessage:
|
||||
payload = msg.payload
|
||||
result = service.run(
|
||||
request_id=msg.id,
|
||||
file_path=payload["file"],
|
||||
model_name=payload.get("model", "base"),
|
||||
device=payload.get("device", "cpu"),
|
||||
compute_type=payload.get("compute_type", "int8"),
|
||||
language=payload.get("language"),
|
||||
num_speakers=payload.get("num_speakers"),
|
||||
min_speakers=payload.get("min_speakers"),
|
||||
max_speakers=payload.get("max_speakers"),
|
||||
skip_diarization=payload.get("skip_diarization", False),
|
||||
)
|
||||
return IPCMessage(
|
||||
id=msg.id,
|
||||
type="pipeline.result",
|
||||
payload=pipeline_result_to_payload(result),
|
||||
)
|
||||
|
||||
return handler
|
||||
|
||||
|
||||
def hardware_detect_handler(msg: IPCMessage) -> IPCMessage:
|
||||
"""Detect hardware capabilities and return recommendations."""
|
||||
from voice_to_notes.hardware.detect import detect_hardware
|
||||
|
||||
Reference in New Issue
Block a user