Merge perf/chunked-transcription: chunk-based processing for large files
This commit is contained in:
@@ -96,15 +96,40 @@ class PipelineService:
|
||||
"words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
|
||||
}))
|
||||
|
||||
transcription = self._transcribe_service.transcribe(
|
||||
request_id=request_id,
|
||||
file_path=file_path,
|
||||
model_name=model_name,
|
||||
device=device,
|
||||
compute_type=compute_type,
|
||||
language=language,
|
||||
on_segment=_emit_segment,
|
||||
)
|
||||
# Probe audio duration for conditional chunked transcription
|
||||
audio_duration_sec = None
|
||||
try:
|
||||
import subprocess
|
||||
probe_result = subprocess.run(
|
||||
["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
audio_duration_sec = float(probe_result.stdout.strip())
|
||||
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
||||
pass
|
||||
|
||||
from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
|
||||
if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
|
||||
transcription = self._transcribe_service.transcribe_chunked(
|
||||
request_id=request_id,
|
||||
file_path=file_path,
|
||||
model_name=model_name,
|
||||
device=device,
|
||||
compute_type=compute_type,
|
||||
language=language,
|
||||
on_segment=_emit_segment,
|
||||
)
|
||||
else:
|
||||
transcription = self._transcribe_service.transcribe(
|
||||
request_id=request_id,
|
||||
file_path=file_path,
|
||||
model_name=model_name,
|
||||
device=device,
|
||||
compute_type=compute_type,
|
||||
language=language,
|
||||
on_segment=_emit_segment,
|
||||
)
|
||||
|
||||
if skip_diarization:
|
||||
# Convert transcription directly without speaker labels
|
||||
|
||||
Reference in New Issue
Block a user