Show chunk context in transcription progress for large files
All checks were successful
Build Sidecars / Bump sidecar version and tag (push) Successful in 3s
Release / Bump version and tag (push) Successful in 3s
Build Sidecars / Build Sidecar (macOS) (push) Successful in 8m30s
Release / Build App (macOS) (push) Successful in 1m19s
Build Sidecars / Build Sidecar (Linux) (push) Successful in 12m9s
Release / Build App (Linux) (push) Successful in 3m36s
Build Sidecars / Build Sidecar (Windows) (push) Successful in 29m36s
Release / Build App (Windows) (push) Successful in 3m13s
All checks were successful
Build Sidecars / Bump sidecar version and tag (push) Successful in 3s
Release / Bump version and tag (push) Successful in 3s
Build Sidecars / Build Sidecar (macOS) (push) Successful in 8m30s
Release / Build App (macOS) (push) Successful in 1m19s
Build Sidecars / Build Sidecar (Linux) (push) Successful in 12m9s
Release / Build App (Linux) (push) Successful in 3m36s
Build Sidecars / Build Sidecar (Windows) (push) Successful in 29m36s
Release / Build App (Windows) (push) Successful in 3m13s
Files >1 hour are split into 5-minute chunks. Previously each chunk showed "Starting transcription..." making it look like a restart. Now shows "Chunk 3/12: Starting transcription..." and "Chunk 3/12: Transcribing segment 5 (42% of audio)..." Also skips the "Loading model..." message for chunks after the first since the model is already loaded. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -113,17 +113,22 @@ class TranscribeService:
|
|||||||
compute_type: str = "int8",
|
compute_type: str = "int8",
|
||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
on_segment: Callable[[SegmentResult, int], None] | None = None,
|
on_segment: Callable[[SegmentResult, int], None] | None = None,
|
||||||
|
chunk_label: str | None = None,
|
||||||
) -> TranscriptionResult:
|
) -> TranscriptionResult:
|
||||||
"""Transcribe an audio file with word-level timestamps.
|
"""Transcribe an audio file with word-level timestamps.
|
||||||
|
|
||||||
Sends progress messages via IPC during processing.
|
Sends progress messages via IPC during processing.
|
||||||
|
If chunk_label is set (e.g. "chunk 3/12"), messages are prefixed with it.
|
||||||
"""
|
"""
|
||||||
# Stage: loading model
|
prefix = f"{chunk_label}: " if chunk_label else ""
|
||||||
write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
|
|
||||||
|
# Stage: loading model (skip for chunks after the first — model already loaded)
|
||||||
|
if not chunk_label:
|
||||||
|
write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
|
||||||
model = self._ensure_model(model_name, device, compute_type)
|
model = self._ensure_model(model_name, device, compute_type)
|
||||||
|
|
||||||
# Stage: transcribing
|
# Stage: transcribing
|
||||||
write_message(progress_message(request_id, 10, "transcribing", "Starting transcription..."))
|
write_message(progress_message(request_id, 10, "transcribing", f"{prefix}Starting transcription..."))
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
segments_iter, info = model.transcribe(
|
segments_iter, info = model.transcribe(
|
||||||
@@ -176,7 +181,7 @@ class TranscribeService:
|
|||||||
request_id,
|
request_id,
|
||||||
progress_pct,
|
progress_pct,
|
||||||
"transcribing",
|
"transcribing",
|
||||||
f"Transcribing segment {segment_count} ({progress_pct}% of audio)...",
|
f"{prefix}Transcribing segment {segment_count} ({progress_pct}% of audio)...",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -271,6 +276,7 @@ class TranscribeService:
|
|||||||
chunk_result = self.transcribe(
|
chunk_result = self.transcribe(
|
||||||
request_id, tmp.name, model_name, device,
|
request_id, tmp.name, model_name, device,
|
||||||
compute_type, language, on_segment=chunk_on_segment,
|
compute_type, language, on_segment=chunk_on_segment,
|
||||||
|
chunk_label=f"Chunk {chunk_idx + 1}/{num_chunks}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Offset timestamps and merge
|
# Offset timestamps and merge
|
||||||
|
|||||||
Reference in New Issue
Block a user