Add chunked transcription for large audio files (>1 hour)

Split files >1 hour into 5-minute chunks via ffmpeg, transcribe each
chunk independently, then merge results with corrected timestamps.
Also add chunk-level progress markers every 10 segments for all files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude
2026-03-20 13:49:20 -07:00
parent d00281f0c7
commit 16f4b57771
4 changed files with 248 additions and 8 deletions

View File

@@ -82,14 +82,38 @@ class PipelineService:
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
)
transcription = self._transcribe_service.transcribe(
request_id=request_id,
file_path=file_path,
model_name=model_name,
device=device,
compute_type=compute_type,
language=language,
)
# Probe audio duration for conditional chunked transcription
audio_duration_sec = None
try:
import subprocess
probe_result = subprocess.run(
["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
capture_output=True, text=True, check=True,
)
audio_duration_sec = float(probe_result.stdout.strip())
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
pass
from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
transcription = self._transcribe_service.transcribe_chunked(
request_id=request_id,
file_path=file_path,
model_name=model_name,
device=device,
compute_type=compute_type,
language=language,
)
else:
transcription = self._transcribe_service.transcribe(
request_id=request_id,
file_path=file_path,
model_name=model_name,
device=device,
compute_type=compute_type,
language=language,
)
if skip_diarization:
# Convert transcription directly without speaker labels