Run pyannote diarization in background thread with progress reporting
Move the blocking pipeline() call to a daemon thread and emit estimated progress messages every 2 seconds from the main thread. The progress estimate uses audio duration to calibrate the expected total time. Also pass audio_duration_sec from PipelineService to DiarizeService. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
@@ -82,6 +83,8 @@ class DiarizeService:
|
||||
num_speakers: int | None = None,
|
||||
min_speakers: int | None = None,
|
||||
max_speakers: int | None = None,
|
||||
hf_token: str | None = None,
|
||||
audio_duration_sec: float | None = None,
|
||||
) -> DiarizationResult:
|
||||
"""Run speaker diarization on an audio file.
|
||||
|
||||
@@ -116,8 +119,36 @@ class DiarizeService:
|
||||
if max_speakers is not None:
|
||||
kwargs["max_speakers"] = max_speakers
|
||||
|
||||
# Run diarization
|
||||
diarization = pipeline(file_path, **kwargs)
|
||||
# Run diarization in background thread for progress reporting
|
||||
result_holder: list = [None]
|
||||
error_holder: list[Exception | None] = [None]
|
||||
done_event = threading.Event()
|
||||
|
||||
def _run():
|
||||
try:
|
||||
result_holder[0] = pipeline(file_path, **kwargs)
|
||||
except Exception as e:
|
||||
error_holder[0] = e
|
||||
finally:
|
||||
done_event.set()
|
||||
|
||||
thread = threading.Thread(target=_run, daemon=True)
|
||||
thread.start()
|
||||
|
||||
elapsed = 0.0
|
||||
estimated_total = max(audio_duration_sec * 0.5, 30.0) if audio_duration_sec else 120.0
|
||||
while not done_event.wait(timeout=2.0):
|
||||
elapsed += 2.0
|
||||
pct = min(20 + int((elapsed / estimated_total) * 65), 85)
|
||||
write_message(progress_message(
|
||||
request_id, pct, "diarizing",
|
||||
f"Analyzing speakers ({int(elapsed)}s elapsed)..."))
|
||||
|
||||
thread.join()
|
||||
|
||||
if error_holder[0] is not None:
|
||||
raise error_holder[0]
|
||||
diarization = result_holder[0]
|
||||
|
||||
# Convert pyannote output to our format
|
||||
result = DiarizationResult()
|
||||
|
||||
@@ -121,6 +121,7 @@ class PipelineService:
|
||||
num_speakers=num_speakers,
|
||||
min_speakers=min_speakers,
|
||||
max_speakers=max_speakers,
|
||||
audio_duration_sec=transcription.duration_ms / 1000.0,
|
||||
)
|
||||
|
||||
# Step 3: Merge
|
||||
|
||||
Reference in New Issue
Block a user