From 33ca3e4a28fbe0dd65195e4cd4e27777bfe8d422 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 23 Mar 2026 07:57:57 -0700 Subject: [PATCH] Show chunk context in transcription progress for large files Files >1 hour are split into 5-minute chunks. Previously each chunk showed "Starting transcription..." making it look like a restart. Now shows "Chunk 3/12: Starting transcription..." and "Chunk 3/12: Transcribing segment 5 (42% of audio)..." Also skips the "Loading model..." message for chunks after the first since the model is already loaded. Co-Authored-By: Claude Opus 4.6 --- python/voice_to_notes/services/transcribe.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/python/voice_to_notes/services/transcribe.py b/python/voice_to_notes/services/transcribe.py index 0d975ce..cf31515 100644 --- a/python/voice_to_notes/services/transcribe.py +++ b/python/voice_to_notes/services/transcribe.py @@ -113,17 +113,22 @@ class TranscribeService: compute_type: str = "int8", language: str | None = None, on_segment: Callable[[SegmentResult, int], None] | None = None, + chunk_label: str | None = None, ) -> TranscriptionResult: """Transcribe an audio file with word-level timestamps. Sends progress messages via IPC during processing. + If chunk_label is set (e.g. "chunk 3/12"), messages are prefixed with it. """ - # Stage: loading model - write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}...")) + prefix = f"{chunk_label}: " if chunk_label else "" + + # Stage: loading model (skip for chunks after the first — model already loaded) + if not chunk_label: + write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}...")) model = self._ensure_model(model_name, device, compute_type) # Stage: transcribing - write_message(progress_message(request_id, 10, "transcribing", "Starting transcription...")) + write_message(progress_message(request_id, 10, "transcribing", f"{prefix}Starting transcription...")) start_time = time.time() segments_iter, info = model.transcribe( @@ -176,7 +181,7 @@ class TranscribeService: request_id, progress_pct, "transcribing", - f"Transcribing segment {segment_count} ({progress_pct}% of audio)...", + f"{prefix}Transcribing segment {segment_count} ({progress_pct}% of audio)...", ) ) @@ -271,6 +276,7 @@ class TranscribeService: chunk_result = self.transcribe( request_id, tmp.name, model_name, device, compute_type, language, on_segment=chunk_on_segment, + chunk_label=f"Chunk {chunk_idx + 1}/{num_chunks}", ) # Offset timestamps and merge