Add cloud-only sidecar variant (~50MB vs 500MB-2GB)
All checks were successful
Tests / Python Backend Tests (push) Successful in 6s
Tests / Frontend Tests (push) Successful in 7s
Tests / Rust Sidecar Tests (push) Successful in 1m59s

Lightweight Deepgram-only sidecar that excludes PyTorch, faster-whisper,
RealtimeSTT, and CUDA. Only includes audio capture + WebSocket streaming
to Deepgram. Requires a Deepgram API key (BYOK or managed mode).

Changes:
- client/models.py: Extracted TranscriptionResult into standalone module
  so deepgram_transcription.py doesn't transitively import torch
- backend/app_controller.py: Made RealtimeTranscriptionEngine and
  DeviceManager imports lazy (only loaded when remote.mode == "local")
- local-transcription-cloud.spec: PyInstaller spec excluding all ML deps
- SidecarSetup.svelte: Added "Cloud Only (Deepgram)" variant option
- build-sidecar-cloud.yml: CI workflow building cloud sidecar for all 3 OS
- sidecar-release.yml: Dispatches cloud build alongside CPU/CUDA builds

Sidecar download options are now:
- Standard (CPU): ~500 MB - local Whisper on any computer
- GPU Accelerated (CUDA): ~2 GB - local Whisper with NVIDIA GPU
- Cloud Only (Deepgram): ~50 MB - requires API key, no local models

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Developer
2026-04-07 16:57:43 -07:00
parent bb039399fc
commit 3d3d7ec3c5
10 changed files with 469 additions and 42 deletions

View File

@@ -18,13 +18,18 @@ import sys
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
from client.models import TranscriptionResult
from client.deepgram_transcription import DeepgramTranscriptionEngine
from client.server_sync import ServerSyncClient
from server.web_display import TranscriptionWebServer
from version import __version__
# Heavy imports (torch, RealtimeSTT, faster-whisper) are deferred so
# the cloud-only sidecar build can exclude them entirely.
# Imported lazily in _initialize_engine() when remote.mode == "local".
RealtimeTranscriptionEngine = None
DeviceManager = None
class AppState:
"""Enum-like class for application states."""
@@ -89,7 +94,18 @@ class AppController:
def __init__(self, config: Optional[Config] = None):
self.config = config or Config()
self.device_manager = DeviceManager()
# DeviceManager is only needed for local Whisper mode.
# Lazy-import to keep the cloud-only sidecar lightweight.
global DeviceManager
if DeviceManager is None:
try:
from client.device_utils import DeviceManager as _DM
DeviceManager = _DM
except ImportError:
DeviceManager = None
self.device_manager = DeviceManager() if DeviceManager else None
# State
self._state = AppState.INITIALIZING
@@ -243,15 +259,12 @@ class AppController:
def _initialize_engine(self):
"""Initialize the transcription engine in a background thread."""
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
audio_device_str = self.config.get('audio.input_device', 'default')
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
model = self.config.get('transcription.model', 'base.en')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
device_config = self.config.get('transcription.device', 'auto')
compute_type = self.config.get('transcription.compute_type', 'default')
self.current_model_size = model
@@ -284,6 +297,18 @@ class AppController:
self.transcription_engine.set_error_callback(self._on_remote_error)
self.transcription_engine.set_credits_low_callback(self._on_credits_low)
else:
# Lazy-import heavy local transcription dependencies
global RealtimeTranscriptionEngine
if RealtimeTranscriptionEngine is None:
from client.transcription_engine_realtime import RealtimeTranscriptionEngine as _RTE
RealtimeTranscriptionEngine = _RTE
if self.device_manager:
self.device_manager.set_device(device_config)
device = self.device_manager.get_device_for_whisper()
else:
device = "cpu"
self.transcription_engine = RealtimeTranscriptionEngine(
model=model,
device=device,
@@ -602,7 +627,7 @@ class AppController:
host = self.config.get('web_server.host', '127.0.0.1')
port = self.actual_web_port or self.config.get('web_server.port', 8080)
device_info = self.device_manager.get_device_info()
device_info = self.device_manager.get_device_info() if self.device_manager else []
remote_mode = self.config.get('remote.mode', 'local')
if remote_mode in ('managed', 'byok') and self.transcription_engine:
@@ -646,10 +671,13 @@ class AppController:
def get_compute_devices(self) -> list[dict]:
"""List available compute devices."""
device_info = self.device_manager.get_device_info()
devices = [{"id": "auto", "name": "Auto-detect"}]
for dev_id, dev_name in device_info:
devices.append({"id": dev_id, "name": dev_name})
if self.device_manager:
device_info = self.device_manager.get_device_info()
for dev_id, dev_name in device_info:
devices.append({"id": dev_id, "name": dev_name})
else:
devices.append({"id": "cloud", "name": "Cloud (Deepgram)"})
return devices
# ── Update Checking ────────────────────────────────────────────