Fix Deepgram streaming latency

Three changes to reduce transcription delay: 1. Send loop: queue.get() was blocking the asyncio event loop, stalling the receive loop and delaying transcription results. Now uses run_in_executor() to avoid blocking the event loop. 2. Block size: reduced from 4096 (~256ms) to 1024 (~64ms) for more frequent, smaller audio chunks. Deepgram handles streaming better with smaller packets. 3. Added punctuate=true and smart_format=true to Deepgram BYOK params for cleaner transcription output. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 16:31:50 -07:00
parent 8db9b8298b
commit 9dcb14e92c
1 changed files with 11 additions and 3 deletions
--- a/client/deepgram_transcription.py
+++ b/client/deepgram_transcription.py
@@ -67,7 +67,7 @@ class DeepgramTranscriptionEngine:
        # Audio parameters
        self.sample_rate: int = 16000
        self.channels: int = 1
-        self.blocksize: int = 4096
+        self.blocksize: int = 1024  # ~64ms chunks for lower latency streaming
        # Callbacks
        self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None
@@ -314,6 +314,8 @@ class DeepgramTranscriptionEngine:
            f"model={self.deepgram_model}"
            f"&language={self.language}"
            "&interim_results=true"
            "&punctuate=true"
            "&smart_format=true"
            "&encoding=linear16"
            f"&sample_rate={self.sample_rate}"
            f"&channels={self.channels}"
@@ -370,10 +372,16 @@ class DeepgramTranscriptionEngine:
    async def _send_loop(self):
        """Drain the audio queue and push raw PCM bytes over the WebSocket."""
        loop = asyncio.get_event_loop()
        while not self._stop_event.is_set():
            try:
-                pcm_bytes = self._audio_queue.get(timeout=0.1)
+                # Use run_in_executor to avoid blocking the async event loop
-            except Empty:
+                # (which would stall the receive loop and delay transcriptions)
                pcm_bytes = await asyncio.wait_for(
                    loop.run_in_executor(None, lambda: self._audio_queue.get(timeout=0.5)),
                    timeout=1.0,
                )
            except (Empty, asyncio.TimeoutError):
                continue
            try: