Add cloud-only sidecar variant (~50MB vs 500MB-2GB)
Lightweight Deepgram-only sidecar that excludes PyTorch, faster-whisper, RealtimeSTT, and CUDA. Only includes audio capture + WebSocket streaming to Deepgram. Requires a Deepgram API key (BYOK or managed mode). Changes: - client/models.py: Extracted TranscriptionResult into standalone module so deepgram_transcription.py doesn't transitively import torch - backend/app_controller.py: Made RealtimeTranscriptionEngine and DeviceManager imports lazy (only loaded when remote.mode == "local") - local-transcription-cloud.spec: PyInstaller spec excluding all ML deps - SidecarSetup.svelte: Added "Cloud Only (Deepgram)" variant option - build-sidecar-cloud.yml: CI workflow building cloud sidecar for all 3 OS - sidecar-release.yml: Dispatches cloud build alongside CPU/CUDA builds Sidecar download options are now: - Standard (CPU): ~500 MB - local Whisper on any computer - GPU Accelerated (CUDA): ~2 GB - local Whisper with NVIDIA GPU - Cloud Only (Deepgram): ~50 MB - requires API key, no local models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -18,13 +18,18 @@ import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from client.config import Config
|
||||
from client.device_utils import DeviceManager
|
||||
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
|
||||
from client.models import TranscriptionResult
|
||||
from client.deepgram_transcription import DeepgramTranscriptionEngine
|
||||
from client.server_sync import ServerSyncClient
|
||||
from server.web_display import TranscriptionWebServer
|
||||
from version import __version__
|
||||
|
||||
# Heavy imports (torch, RealtimeSTT, faster-whisper) are deferred so
|
||||
# the cloud-only sidecar build can exclude them entirely.
|
||||
# Imported lazily in _initialize_engine() when remote.mode == "local".
|
||||
RealtimeTranscriptionEngine = None
|
||||
DeviceManager = None
|
||||
|
||||
|
||||
class AppState:
|
||||
"""Enum-like class for application states."""
|
||||
@@ -89,7 +94,18 @@ class AppController:
|
||||
|
||||
def __init__(self, config: Optional[Config] = None):
|
||||
self.config = config or Config()
|
||||
self.device_manager = DeviceManager()
|
||||
|
||||
# DeviceManager is only needed for local Whisper mode.
|
||||
# Lazy-import to keep the cloud-only sidecar lightweight.
|
||||
global DeviceManager
|
||||
if DeviceManager is None:
|
||||
try:
|
||||
from client.device_utils import DeviceManager as _DM
|
||||
DeviceManager = _DM
|
||||
except ImportError:
|
||||
DeviceManager = None
|
||||
|
||||
self.device_manager = DeviceManager() if DeviceManager else None
|
||||
|
||||
# State
|
||||
self._state = AppState.INITIALIZING
|
||||
@@ -243,15 +259,12 @@ class AppController:
|
||||
|
||||
def _initialize_engine(self):
|
||||
"""Initialize the transcription engine in a background thread."""
|
||||
device_config = self.config.get('transcription.device', 'auto')
|
||||
self.device_manager.set_device(device_config)
|
||||
|
||||
audio_device_str = self.config.get('audio.input_device', 'default')
|
||||
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
|
||||
|
||||
model = self.config.get('transcription.model', 'base.en')
|
||||
language = self.config.get('transcription.language', 'en')
|
||||
device = self.device_manager.get_device_for_whisper()
|
||||
device_config = self.config.get('transcription.device', 'auto')
|
||||
compute_type = self.config.get('transcription.compute_type', 'default')
|
||||
|
||||
self.current_model_size = model
|
||||
@@ -284,6 +297,18 @@ class AppController:
|
||||
self.transcription_engine.set_error_callback(self._on_remote_error)
|
||||
self.transcription_engine.set_credits_low_callback(self._on_credits_low)
|
||||
else:
|
||||
# Lazy-import heavy local transcription dependencies
|
||||
global RealtimeTranscriptionEngine
|
||||
if RealtimeTranscriptionEngine is None:
|
||||
from client.transcription_engine_realtime import RealtimeTranscriptionEngine as _RTE
|
||||
RealtimeTranscriptionEngine = _RTE
|
||||
|
||||
if self.device_manager:
|
||||
self.device_manager.set_device(device_config)
|
||||
device = self.device_manager.get_device_for_whisper()
|
||||
else:
|
||||
device = "cpu"
|
||||
|
||||
self.transcription_engine = RealtimeTranscriptionEngine(
|
||||
model=model,
|
||||
device=device,
|
||||
@@ -602,7 +627,7 @@ class AppController:
|
||||
host = self.config.get('web_server.host', '127.0.0.1')
|
||||
port = self.actual_web_port or self.config.get('web_server.port', 8080)
|
||||
|
||||
device_info = self.device_manager.get_device_info()
|
||||
device_info = self.device_manager.get_device_info() if self.device_manager else []
|
||||
|
||||
remote_mode = self.config.get('remote.mode', 'local')
|
||||
if remote_mode in ('managed', 'byok') and self.transcription_engine:
|
||||
@@ -646,10 +671,13 @@ class AppController:
|
||||
|
||||
def get_compute_devices(self) -> list[dict]:
|
||||
"""List available compute devices."""
|
||||
device_info = self.device_manager.get_device_info()
|
||||
devices = [{"id": "auto", "name": "Auto-detect"}]
|
||||
for dev_id, dev_name in device_info:
|
||||
devices.append({"id": dev_id, "name": dev_name})
|
||||
if self.device_manager:
|
||||
device_info = self.device_manager.get_device_info()
|
||||
for dev_id, dev_name in device_info:
|
||||
devices.append({"id": dev_id, "name": dev_name})
|
||||
else:
|
||||
devices.append({"id": "cloud", "name": "Cloud (Deepgram)"})
|
||||
return devices
|
||||
|
||||
# ── Update Checking ────────────────────────────────────────────
|
||||
|
||||
@@ -79,7 +79,7 @@ async def test_start_when_not_ready(api_client, controller):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear(api_client, controller):
|
||||
from client.transcription_engine_realtime import TranscriptionResult
|
||||
from client.models import TranscriptionResult
|
||||
from datetime import datetime
|
||||
|
||||
controller.transcriptions = [
|
||||
|
||||
@@ -72,7 +72,7 @@ def test_double_start_rejected(controller):
|
||||
|
||||
def test_clear_transcriptions(controller):
|
||||
"""clear_transcriptions should empty the list and return the count."""
|
||||
from client.transcription_engine_realtime import TranscriptionResult
|
||||
from client.models import TranscriptionResult
|
||||
|
||||
controller.transcriptions = [
|
||||
TranscriptionResult(text="Hello", is_final=True, timestamp=datetime.now(), user_name="Alice"),
|
||||
@@ -85,7 +85,7 @@ def test_clear_transcriptions(controller):
|
||||
|
||||
def test_get_transcriptions_text_with_timestamps(controller):
|
||||
"""get_transcriptions_text should include [HH:MM:SS] prefixes when requested."""
|
||||
from client.transcription_engine_realtime import TranscriptionResult
|
||||
from client.models import TranscriptionResult
|
||||
|
||||
ts = datetime(2025, 1, 15, 10, 30, 45)
|
||||
controller.transcriptions = [
|
||||
@@ -141,7 +141,7 @@ def test_apply_settings_no_reload_when_same(controller):
|
||||
|
||||
def test_on_final_transcription_callback_fires(controller):
|
||||
"""_on_final_transcription should append and invoke on_transcription callback."""
|
||||
from client.transcription_engine_realtime import TranscriptionResult
|
||||
from client.models import TranscriptionResult
|
||||
|
||||
received = []
|
||||
controller.on_transcription = lambda data: received.append(data)
|
||||
@@ -166,7 +166,7 @@ def test_on_final_transcription_callback_fires(controller):
|
||||
|
||||
def test_on_final_transcription_ignored_when_not_transcribing(controller):
|
||||
"""If the controller is not in transcribing state the callback should be a no-op."""
|
||||
from client.transcription_engine_realtime import TranscriptionResult
|
||||
from client.models import TranscriptionResult
|
||||
|
||||
controller.is_transcribing = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user