From 33ca3e4a28fbe0dd65195e4cd4e27777bfe8d422 Mon Sep 17 00:00:00 2001
From: Claude <claude@anthropic.com>
Date: Mon, 23 Mar 2026 07:57:57 -0700
Subject: [PATCH] Show chunk context in transcription progress for large files

Files >1 hour are split into 5-minute chunks. Previously each chunk
showed "Starting transcription..." making it look like a restart.
Now shows "Chunk 3/12: Starting transcription..." and
"Chunk 3/12: Transcribing segment 5 (42% of audio)..."

Also skips the "Loading model..." message for chunks after the first
since the model is already loaded.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 python/voice_to_notes/services/transcribe.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/python/voice_to_notes/services/transcribe.py b/python/voice_to_notes/services/transcribe.py
index 0d975ce..cf31515 100644
--- a/python/voice_to_notes/services/transcribe.py
+++ b/python/voice_to_notes/services/transcribe.py
@@ -113,17 +113,22 @@ class TranscribeService:
         compute_type: str = "int8",
         language: str | None = None,
         on_segment: Callable[[SegmentResult, int], None] | None = None,
+        chunk_label: str | None = None,
     ) -> TranscriptionResult:
         """Transcribe an audio file with word-level timestamps.
 
         Sends progress messages via IPC during processing.
+        If chunk_label is set (e.g. "chunk 3/12"), messages are prefixed with it.
         """
-        # Stage: loading model
-        write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
+        prefix = f"{chunk_label}: " if chunk_label else ""
+
+        # Stage: loading model (skip for chunks after the first — model already loaded)
+        if not chunk_label:
+            write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
         model = self._ensure_model(model_name, device, compute_type)
 
         # Stage: transcribing
-        write_message(progress_message(request_id, 10, "transcribing", "Starting transcription..."))
+        write_message(progress_message(request_id, 10, "transcribing", f"{prefix}Starting transcription..."))
 
         start_time = time.time()
         segments_iter, info = model.transcribe(
@@ -176,7 +181,7 @@ class TranscribeService:
                     request_id,
                     progress_pct,
                     "transcribing",
-                    f"Transcribing segment {segment_count} ({progress_pct}% of audio)...",
+                    f"{prefix}Transcribing segment {segment_count} ({progress_pct}% of audio)...",
                 )
             )
 
@@ -271,6 +276,7 @@ class TranscribeService:
                 chunk_result = self.transcribe(
                     request_id, tmp.name, model_name, device,
                     compute_type, language, on_segment=chunk_on_segment,
+                    chunk_label=f"Chunk {chunk_idx + 1}/{num_chunks}",
                 )
 
                 # Offset timestamps and merge