Add cloud-only sidecar variant (~50MB vs 500MB-2GB)

Lightweight Deepgram-only sidecar that excludes PyTorch, faster-whisper, RealtimeSTT, and CUDA. Only includes audio capture + WebSocket streaming to Deepgram. Requires a Deepgram API key (BYOK or managed mode). Changes: - client/models.py: Extracted TranscriptionResult into standalone module so deepgram_transcription.py doesn't transitively import torch - backend/app_controller.py: Made RealtimeTranscriptionEngine and DeviceManager imports lazy (only loaded when remote.mode == "local") - local-transcription-cloud.spec: PyInstaller spec excluding all ML deps - SidecarSetup.svelte: Added "Cloud Only (Deepgram)" variant option - build-sidecar-cloud.yml: CI workflow building cloud sidecar for all 3 OS - sidecar-release.yml: Dispatches cloud build alongside CPU/CUDA builds Sidecar download options are now: - Standard (CPU): ~500 MB - local Whisper on any computer - GPU Accelerated (CUDA): ~2 GB - local Whisper with NVIDIA GPU - Cloud Only (Deepgram): ~50 MB - requires API key, no local models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 16:57:43 -07:00
parent bb039399fc
commit 3d3d7ec3c5
10 changed files with 469 additions and 42 deletions
--- a/backend/app_controller.py
+++ b/backend/app_controller.py
@@ -18,13 +18,18 @@ import sys
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

 from client.config import Config
-from client.device_utils import DeviceManager
-from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
+from client.models import TranscriptionResult
 from client.deepgram_transcription import DeepgramTranscriptionEngine
 from client.server_sync import ServerSyncClient
 from server.web_display import TranscriptionWebServer
 from version import __version__

+# Heavy imports (torch, RealtimeSTT, faster-whisper) are deferred so
+# the cloud-only sidecar build can exclude them entirely.
+# Imported lazily in _initialize_engine() when remote.mode == "local".
+RealtimeTranscriptionEngine = None
+DeviceManager = None
+

 class AppState:
    """Enum-like class for application states."""
@@ -89,7 +94,18 @@ class AppController:

    def __init__(self, config: Optional[Config] = None):
        self.config = config or Config()
-        self.device_manager = DeviceManager()
+
+        # DeviceManager is only needed for local Whisper mode.
+        # Lazy-import to keep the cloud-only sidecar lightweight.
+        global DeviceManager
+        if DeviceManager is None:
+            try:
+                from client.device_utils import DeviceManager as _DM
+                DeviceManager = _DM
+            except ImportError:
+                DeviceManager = None
+
+        self.device_manager = DeviceManager() if DeviceManager else None

        # State
        self._state = AppState.INITIALIZING
@@ -243,15 +259,12 @@ class AppController:

    def _initialize_engine(self):
        """Initialize the transcription engine in a background thread."""
-        device_config = self.config.get('transcription.device', 'auto')
-        self.device_manager.set_device(device_config)
-
        audio_device_str = self.config.get('audio.input_device', 'default')
        audio_device = None if audio_device_str == 'default' else int(audio_device_str)

        model = self.config.get('transcription.model', 'base.en')
        language = self.config.get('transcription.language', 'en')
-        device = self.device_manager.get_device_for_whisper()
+        device_config = self.config.get('transcription.device', 'auto')
        compute_type = self.config.get('transcription.compute_type', 'default')

        self.current_model_size = model
@@ -284,6 +297,18 @@ class AppController:
            self.transcription_engine.set_error_callback(self._on_remote_error)
            self.transcription_engine.set_credits_low_callback(self._on_credits_low)
        else:
+            # Lazy-import heavy local transcription dependencies
+            global RealtimeTranscriptionEngine
+            if RealtimeTranscriptionEngine is None:
+                from client.transcription_engine_realtime import RealtimeTranscriptionEngine as _RTE
+                RealtimeTranscriptionEngine = _RTE
+
+            if self.device_manager:
+                self.device_manager.set_device(device_config)
+                device = self.device_manager.get_device_for_whisper()
+            else:
+                device = "cpu"
+
            self.transcription_engine = RealtimeTranscriptionEngine(
                model=model,
                device=device,
@@ -602,7 +627,7 @@ class AppController:
        host = self.config.get('web_server.host', '127.0.0.1')
        port = self.actual_web_port or self.config.get('web_server.port', 8080)

-        device_info = self.device_manager.get_device_info()
+        device_info = self.device_manager.get_device_info() if self.device_manager else []

        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok') and self.transcription_engine:
@@ -646,10 +671,13 @@ class AppController:

    def get_compute_devices(self) -> list[dict]:
        """List available compute devices."""
-        device_info = self.device_manager.get_device_info()
        devices = [{"id": "auto", "name": "Auto-detect"}]
-        for dev_id, dev_name in device_info:
-            devices.append({"id": dev_id, "name": dev_name})
+        if self.device_manager:
+            device_info = self.device_manager.get_device_info()
+            for dev_id, dev_name in device_info:
+                devices.append({"id": dev_id, "name": dev_name})
+        else:
+            devices.append({"id": "cloud", "name": "Cloud (Deepgram)"})
        return devices

    # ── Update Checking ────────────────────────────────────────────
--- a/backend/tests/test_api_server.py
+++ b/backend/tests/test_api_server.py
@@ -79,7 +79,7 @@ async def test_start_when_not_ready(api_client, controller):

@pytest.mark.asyncio
 async def test_clear(api_client, controller):
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult
    from datetime import datetime

    controller.transcriptions = [
--- a/backend/tests/test_app_controller.py
+++ b/backend/tests/test_app_controller.py
@@ -72,7 +72,7 @@ def test_double_start_rejected(controller):

 def test_clear_transcriptions(controller):
    """clear_transcriptions should empty the list and return the count."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult

    controller.transcriptions = [
        TranscriptionResult(text="Hello", is_final=True, timestamp=datetime.now(), user_name="Alice"),
@@ -85,7 +85,7 @@ def test_clear_transcriptions(controller):

 def test_get_transcriptions_text_with_timestamps(controller):
    """get_transcriptions_text should include [HH:MM:SS] prefixes when requested."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult

    ts = datetime(2025, 1, 15, 10, 30, 45)
    controller.transcriptions = [
@@ -141,7 +141,7 @@ def test_apply_settings_no_reload_when_same(controller):

 def test_on_final_transcription_callback_fires(controller):
    """_on_final_transcription should append and invoke on_transcription callback."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult

    received = []
    controller.on_transcription = lambda data: received.append(data)
@@ -166,7 +166,7 @@ def test_on_final_transcription_callback_fires(controller):

 def test_on_final_transcription_ignored_when_not_transcribing(controller):
    """If the controller is not in transcribing state the callback should be a no-op."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult

    controller.is_transcribing = False