From 9dcb14e92c4f420198e0f149ea839b1378b5061e Mon Sep 17 00:00:00 2001
From: Developer <dev@local-transcription.local>
Date: Tue, 7 Apr 2026 16:31:50 -0700
Subject: [PATCH] Fix Deepgram streaming latency

Three changes to reduce transcription delay:

1. Send loop: queue.get() was blocking the asyncio event loop, stalling
   the receive loop and delaying transcription results. Now uses
   run_in_executor() to avoid blocking the event loop.

2. Block size: reduced from 4096 (~256ms) to 1024 (~64ms) for more
   frequent, smaller audio chunks. Deepgram handles streaming better
   with smaller packets.

3. Added punctuate=true and smart_format=true to Deepgram BYOK
   params for cleaner transcription output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 client/deepgram_transcription.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/client/deepgram_transcription.py b/client/deepgram_transcription.py
index 79556da..b2ec521 100644
--- a/client/deepgram_transcription.py
+++ b/client/deepgram_transcription.py
@@ -67,7 +67,7 @@ class DeepgramTranscriptionEngine:
         # Audio parameters
         self.sample_rate: int = 16000
         self.channels: int = 1
-        self.blocksize: int = 4096
+        self.blocksize: int = 1024  # ~64ms chunks for lower latency streaming
 
         # Callbacks
         self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None
@@ -314,6 +314,8 @@ class DeepgramTranscriptionEngine:
             f"model={self.deepgram_model}"
             f"&language={self.language}"
             "&interim_results=true"
+            "&punctuate=true"
+            "&smart_format=true"
             "&encoding=linear16"
             f"&sample_rate={self.sample_rate}"
             f"&channels={self.channels}"
@@ -370,10 +372,16 @@ class DeepgramTranscriptionEngine:
 
     async def _send_loop(self):
         """Drain the audio queue and push raw PCM bytes over the WebSocket."""
+        loop = asyncio.get_event_loop()
         while not self._stop_event.is_set():
             try:
-                pcm_bytes = self._audio_queue.get(timeout=0.1)
-            except Empty:
+                # Use run_in_executor to avoid blocking the async event loop
+                # (which would stall the receive loop and delay transcriptions)
+                pcm_bytes = await asyncio.wait_for(
+                    loop.run_in_executor(None, lambda: self._audio_queue.get(timeout=0.5)),
+                    timeout=1.0,
+                )
+            except (Empty, asyncio.TimeoutError):
                 continue
 
             try: