Add cloud-only sidecar variant (~50MB vs 500MB-2GB)

Lightweight Deepgram-only sidecar that excludes PyTorch, faster-whisper, RealtimeSTT, and CUDA. Only includes audio capture + WebSocket streaming to Deepgram. Requires a Deepgram API key (BYOK or managed mode). Changes: - client/models.py: Extracted TranscriptionResult into standalone module so deepgram_transcription.py doesn't transitively import torch - backend/app_controller.py: Made RealtimeTranscriptionEngine and DeviceManager imports lazy (only loaded when remote.mode == "local") - local-transcription-cloud.spec: PyInstaller spec excluding all ML deps - SidecarSetup.svelte: Added "Cloud Only (Deepgram)" variant option - build-sidecar-cloud.yml: CI workflow building cloud sidecar for all 3 OS - sidecar-release.yml: Dispatches cloud build alongside CPU/CUDA builds Sidecar download options are now: - Standard (CPU): ~500 MB - local Whisper on any computer - GPU Accelerated (CUDA): ~2 GB - local Whisper with NVIDIA GPU - Cloud Only (Deepgram): ~50 MB - requires API key, no local models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 16:57:43 -07:00
parent bb039399fc
commit 3d3d7ec3c5
10 changed files with 469 additions and 42 deletions
--- a/local-transcription-cloud.spec
+++ b/local-transcription-cloud.spec
@@ -0,0 +1,152 @@
+# -*- mode: python ; coding: utf-8 -*-
+"""PyInstaller spec file for cloud-only Local Transcription backend.
+
+This builds a lightweight sidecar (~50MB) that only supports Deepgram
+cloud transcription (managed + BYOK). No local Whisper models, no
+PyTorch, no CUDA -- just audio capture and WebSocket streaming.
+"""
+
+import sys
+import os
+
+block_cipher = None
+is_windows = sys.platform == 'win32'
+
+from PyInstaller.utils.hooks import collect_submodules, collect_data_files
+
+# Data files
+datas = [
+    ('config/default_config.yaml', 'config'),
+]
+
+# Hidden imports -- only lightweight deps needed for Deepgram streaming
+hiddenimports = [
+    'sounddevice',
+    'numpy',
+    # FastAPI and dependencies
+    'fastapi',
+    'fastapi.routing',
+    'fastapi.responses',
+    'starlette',
+    'starlette.applications',
+    'starlette.routing',
+    'starlette.responses',
+    'starlette.websockets',
+    'starlette.middleware',
+    'starlette.middleware.cors',
+    'pydantic',
+    'pydantic.fields',
+    'pydantic.main',
+    'anyio',
+    'anyio._backends',
+    'anyio._backends._asyncio',
+    'sniffio',
+    # Uvicorn
+    'uvicorn',
+    'uvicorn.logging',
+    'uvicorn.loops',
+    'uvicorn.loops.auto',
+    'uvicorn.protocols',
+    'uvicorn.protocols.http',
+    'uvicorn.protocols.http.auto',
+    'uvicorn.protocols.http.h11_impl',
+    'uvicorn.protocols.websockets',
+    'uvicorn.protocols.websockets.auto',
+    'uvicorn.protocols.websockets.wsproto_impl',
+    'uvicorn.lifespan',
+    'uvicorn.lifespan.on',
+    'h11',
+    'websockets',
+    'websockets.legacy',
+    'websockets.legacy.server',
+    # HTTP client
+    'requests',
+    'urllib3',
+    'certifi',
+    'charset_normalizer',
+]
+
+# Collect submodules for key packages
+print("Collecting submodules for cloud backend packages...")
+for package in ['fastapi', 'starlette', 'pydantic', 'pydantic_core', 'anyio', 'uvicorn', 'websockets', 'h11']:
+    try:
+        submodules = collect_submodules(package)
+        hiddenimports += submodules
+        print(f"  + Collected {len(submodules)} submodules from {package}")
+    except Exception as e:
+        print(f"  - Warning: Could not collect {package}: {e}")
+
+# Collect data files
+for package in ['fastapi', 'starlette', 'pydantic', 'uvicorn']:
+    try:
+        data_files = collect_data_files(package)
+        if data_files:
+            datas += data_files
+    except Exception:
+        pass
+
+# Pydantic critical deps
+hiddenimports += [
+    'colorsys', 'decimal', 'json', 'ipaddress', 'pathlib', 'uuid',
+    'email.message', 'typing_extensions',
+]
+
+a = Analysis(
+    ['backend/main_headless.py'],
+    pathex=[],
+    binaries=[],
+    datas=datas,
+    hiddenimports=hiddenimports,
+    hookspath=['hooks'],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[
+        # Exclude all heavy ML/local transcription deps
+        'torch', 'torchaudio', 'torchvision',
+        'faster_whisper', 'ctranslate2',
+        'RealtimeSTT', 'webrtcvad', 'webrtcvad_wheels',
+        'silero_vad', 'onnxruntime',
+        'openwakeword', 'pvporcupine', 'pyaudio',
+        'noisereduce', 'scipy',
+        # Exclude GUI frameworks
+        'PySide6', 'PyQt5', 'PyQt6', 'tkinter',
+        # Exclude other unnecessary heavy packages
+        'matplotlib', 'PIL', 'cv2',
+    ],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name='local-transcription-backend',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+    icon='LocalTranscription.ico' if is_windows else None,
+)
+
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name='local-transcription-backend',
+)