From 9dcb14e92c4f420198e0f149ea839b1378b5061e Mon Sep 17 00:00:00 2001 From: Developer Date: Tue, 7 Apr 2026 16:31:50 -0700 Subject: [PATCH] Fix Deepgram streaming latency Three changes to reduce transcription delay: 1. Send loop: queue.get() was blocking the asyncio event loop, stalling the receive loop and delaying transcription results. Now uses run_in_executor() to avoid blocking the event loop. 2. Block size: reduced from 4096 (~256ms) to 1024 (~64ms) for more frequent, smaller audio chunks. Deepgram handles streaming better with smaller packets. 3. Added punctuate=true and smart_format=true to Deepgram BYOK params for cleaner transcription output. Co-Authored-By: Claude Opus 4.6 (1M context) --- client/deepgram_transcription.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/client/deepgram_transcription.py b/client/deepgram_transcription.py index 79556da..b2ec521 100644 --- a/client/deepgram_transcription.py +++ b/client/deepgram_transcription.py @@ -67,7 +67,7 @@ class DeepgramTranscriptionEngine: # Audio parameters self.sample_rate: int = 16000 self.channels: int = 1 - self.blocksize: int = 4096 + self.blocksize: int = 1024 # ~64ms chunks for lower latency streaming # Callbacks self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None @@ -314,6 +314,8 @@ class DeepgramTranscriptionEngine: f"model={self.deepgram_model}" f"&language={self.language}" "&interim_results=true" + "&punctuate=true" + "&smart_format=true" "&encoding=linear16" f"&sample_rate={self.sample_rate}" f"&channels={self.channels}" @@ -370,10 +372,16 @@ class DeepgramTranscriptionEngine: async def _send_loop(self): """Drain the audio queue and push raw PCM bytes over the WebSocket.""" + loop = asyncio.get_event_loop() while not self._stop_event.is_set(): try: - pcm_bytes = self._audio_queue.get(timeout=0.1) - except Empty: + # Use run_in_executor to avoid blocking the async event loop + # (which would stall the receive loop and delay transcriptions) + pcm_bytes = await asyncio.wait_for( + loop.run_in_executor(None, lambda: self._audio_queue.get(timeout=0.5)), + timeout=1.0, + ) + except (Empty, asyncio.TimeoutError): continue try: