Merge perf/diarize-threading: diarization progress via background thread

This commit is contained in:
Claude
2026-03-20 13:52:59 -07:00
3 changed files with 113 additions and 6 deletions

View File

@@ -6,6 +6,7 @@ import os
import subprocess
import sys
import tempfile
import threading
import time
from dataclasses import dataclass, field
from pathlib import Path
@@ -141,6 +142,7 @@ class DiarizeService:
min_speakers: int | None = None,
max_speakers: int | None = None,
hf_token: str | None = None,
audio_duration_sec: float | None = None,
) -> DiarizationResult:
"""Run speaker diarization on an audio file.
@@ -184,12 +186,40 @@ class DiarizeService:
flush=True,
)
# Run diarization
try:
raw_result = pipeline(audio_path, **kwargs)
finally:
if temp_wav:
os.unlink(temp_wav)
# Run diarization in background thread for progress reporting
result_holder: list = [None]
error_holder: list[Exception | None] = [None]
done_event = threading.Event()
def _run():
try:
result_holder[0] = pipeline(audio_path, **kwargs)
except Exception as e:
error_holder[0] = e
finally:
done_event.set()
thread = threading.Thread(target=_run, daemon=True)
thread.start()
elapsed = 0.0
estimated_total = max(audio_duration_sec * 0.5, 30.0) if audio_duration_sec else 120.0
while not done_event.wait(timeout=2.0):
elapsed += 2.0
pct = min(20 + int((elapsed / estimated_total) * 65), 85)
write_message(progress_message(
request_id, pct, "diarizing",
f"Analyzing speakers ({int(elapsed)}s elapsed)..."))
thread.join()
# Clean up temp file
if temp_wav:
os.unlink(temp_wav)
if error_holder[0] is not None:
raise error_holder[0]
raw_result = result_holder[0]
# pyannote 4.0+ returns DiarizeOutput; older versions return Annotation directly
if hasattr(raw_result, "speaker_diarization"):