Add cloud-only sidecar variant (~50MB vs 500MB-2GB)

Lightweight Deepgram-only sidecar that excludes PyTorch, faster-whisper, RealtimeSTT, and CUDA. Only includes audio capture + WebSocket streaming to Deepgram. Requires a Deepgram API key (BYOK or managed mode). Changes: - client/models.py: Extracted TranscriptionResult into standalone module so deepgram_transcription.py doesn't transitively import torch - backend/app_controller.py: Made RealtimeTranscriptionEngine and DeviceManager imports lazy (only loaded when remote.mode == "local") - local-transcription-cloud.spec: PyInstaller spec excluding all ML deps - SidecarSetup.svelte: Added "Cloud Only (Deepgram)" variant option - build-sidecar-cloud.yml: CI workflow building cloud sidecar for all 3 OS - sidecar-release.yml: Dispatches cloud build alongside CPU/CUDA builds Sidecar download options are now: - Standard (CPU): ~500 MB - local Whisper on any computer - GPU Accelerated (CUDA): ~2 GB - local Whisper with NVIDIA GPU - Cloud Only (Deepgram): ~50 MB - requires API key, no local models Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 16:57:43 -07:00
parent bb039399fc
commit 3d3d7ec3c5
10 changed files with 469 additions and 42 deletions
--- a/.gitea/workflows/build-sidecar-cloud.yml
+++ b/.gitea/workflows/build-sidecar-cloud.yml
@@ -0,0 +1,227 @@
 name: Build Sidecar (Cloud)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Sidecar release tag to build (e.g. sidecar-v1.0.5)'
        required: true
 jobs:
  build-cloud-linux:
    name: Build Cloud Sidecar (Linux)
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: "3.11"
      RELEASE_TAG: ${{ inputs.tag }}
    steps:
      - name: Show tag
        run: echo "Building cloud sidecar for tag ${RELEASE_TAG}"
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.tag }}
      - name: Install uv
        run: |
          curl -LsSf https://astral.sh/uv/install.sh | sh
          echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y portaudio19-dev
      - name: Build cloud sidecar
        env:
          UV_NO_SOURCES: "1"
        run: |
          uv venv
          uv pip install pyinstaller numpy sounddevice fastapi uvicorn websockets pydantic requests pyyaml packaging
          .venv/bin/pyinstaller local-transcription-cloud.spec
      - name: Package
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cloud.zip .
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${RELEASE_TAG}"
          for i in $(seq 1 30); do
            RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found release ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: waiting for release..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Release not found"; exit 1
          fi
          for file in sidecar-*-cloud.zip; do
            filename=$(basename "$file")
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            [ -n "${ASSET_ID}" ] && curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            curl -s -o /dev/null -w "Upload ${filename}: HTTP %{http_code}\n" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" -H "Content-Type: application/octet-stream" \
              -T "$file" "${REPO_API}/releases/${RELEASE_ID}/assets?name=${filename}"
          done
  build-cloud-windows:
    name: Build Cloud Sidecar (Windows)
    runs-on: windows-latest
    env:
      PYTHON_VERSION: "3.11"
      RELEASE_TAG: ${{ inputs.tag }}
    steps:
      - name: Show tag
        shell: powershell
        run: Write-Host "Building cloud sidecar for tag $env:RELEASE_TAG"
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.tag }}
      - name: Install uv
        shell: powershell
        run: |
          if (Get-Command uv -ErrorAction SilentlyContinue) {
            Write-Host "uv already installed"
          } else {
            irm https://astral.sh/uv/install.ps1 | iex
            $uvPaths = @("$env:USERPROFILE\.local\bin", "$env:USERPROFILE\.cargo\bin", "$env:LOCALAPPDATA\uv\bin")
            foreach ($p in $uvPaths) { if (Test-Path $p) { echo $p | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append } }
          }
      - name: Set up Python
        shell: powershell
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Build cloud sidecar
        shell: powershell
        env:
          UV_NO_SOURCES: "1"
        run: |
          uv venv
          uv pip install pyinstaller numpy sounddevice fastapi uvicorn websockets pydantic requests pyyaml packaging
          .venv\Scripts\pyinstaller.exe local-transcription-cloud.spec
      - name: Package
        shell: powershell
        run: |
          if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) { choco install 7zip -y }
          7z a -tzip -mx=5 sidecar-windows-x86_64-cloud.zip .\dist\local-transcription-backend\*
      - name: Upload to release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = $env:RELEASE_TAG
          $RELEASE_ID = $null
          for ($i = 1; $i -le 30; $i++) {
            try {
              $release = Invoke-RestMethod -Uri "$REPO_API/releases/tags/$TAG" -Headers $Headers -ErrorAction Stop
              $RELEASE_ID = $release.id
              if ($RELEASE_ID) { Write-Host "Found release $TAG (ID: $RELEASE_ID)"; break }
            } catch {}
            Write-Host "Attempt ${i}/30: waiting..."; Start-Sleep -Seconds 10
          }
          if (-not $RELEASE_ID) { Write-Host "ERROR: Release not found"; exit 1 }
          Get-ChildItem -Path . -Filter "sidecar-*-cloud.zip" | ForEach-Object {
            $fn = $_.Name; $enc = [System.Uri]::EscapeDataString($fn)
            try {
              $assets = Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $fn }
              if ($existing) { Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets/$($existing.id)" -Method Delete -Headers $Headers }
            } catch {}
            curl.exe --fail -s -X POST -H "Authorization: token $env:BUILD_TOKEN" -H "Content-Type: application/octet-stream" -T "$($_.FullName)" "$REPO_API/releases/$RELEASE_ID/assets?name=$enc"
            Write-Host "Uploaded $fn"
          }
  build-cloud-macos:
    name: Build Cloud Sidecar (macOS)
    runs-on: macos-latest
    env:
      PYTHON_VERSION: "3.11"
      RELEASE_TAG: ${{ inputs.tag }}
    steps:
      - name: Show tag
        run: echo "Building cloud sidecar for tag ${RELEASE_TAG}"
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.tag }}
      - name: Install uv
        run: |
          curl -LsSf https://astral.sh/uv/install.sh | sh
          echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install system dependencies
        run: brew install portaudio
      - name: Build cloud sidecar
        env:
          UV_NO_SOURCES: "1"
        run: |
          uv venv
          uv pip install pyinstaller numpy sounddevice fastapi uvicorn websockets pydantic requests pyyaml packaging
          .venv/bin/pyinstaller local-transcription-cloud.spec
      - name: Package
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-macos-aarch64-cloud.zip .
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${RELEASE_TAG}"
          for i in $(seq 1 30); do
            RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found release ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: waiting for release..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Release not found"; exit 1
          fi
          for file in sidecar-*-cloud.zip; do
            filename=$(basename "$file")
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            [ -n "${ASSET_ID}" ] && curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            curl -s -o /dev/null -w "Upload ${filename}: HTTP %{http_code}\n" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" -H "Content-Type: application/octet-stream" \
              -T "$file" "${REPO_API}/releases/${RELEASE_ID}/assets?name=${filename}"
          done
--- a/.gitea/workflows/sidecar-release.yml
+++ b/.gitea/workflows/sidecar-release.yml
@@ -118,7 +118,7 @@ jobs:
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
-          for workflow in build-sidecar-linux.yml build-sidecar-windows.yml build-sidecar-macos.yml; do
+          for workflow in build-sidecar-linux.yml build-sidecar-windows.yml build-sidecar-macos.yml build-sidecar-cloud.yml; do
            echo "Dispatching ${workflow} for ${TAG}..."
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
--- a/backend/app_controller.py
+++ b/backend/app_controller.py
@@ -18,13 +18,18 @@ import sys
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from client.config import Config
-from client.device_utils import DeviceManager
+from client.models import TranscriptionResult
 from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
 from client.deepgram_transcription import DeepgramTranscriptionEngine
 from client.server_sync import ServerSyncClient
 from server.web_display import TranscriptionWebServer
 from version import __version__
 # Heavy imports (torch, RealtimeSTT, faster-whisper) are deferred so
 # the cloud-only sidecar build can exclude them entirely.
 # Imported lazily in _initialize_engine() when remote.mode == "local".
 RealtimeTranscriptionEngine = None
 DeviceManager = None
 class AppState:
    """Enum-like class for application states."""
@@ -89,7 +94,18 @@ class AppController:
    def __init__(self, config: Optional[Config] = None):
        self.config = config or Config()
-        self.device_manager = DeviceManager()
+
        # DeviceManager is only needed for local Whisper mode.
        # Lazy-import to keep the cloud-only sidecar lightweight.
        global DeviceManager
        if DeviceManager is None:
            try:
                from client.device_utils import DeviceManager as _DM
                DeviceManager = _DM
            except ImportError:
                DeviceManager = None
        self.device_manager = DeviceManager() if DeviceManager else None
        # State
        self._state = AppState.INITIALIZING
@@ -243,15 +259,12 @@ class AppController:
    def _initialize_engine(self):
        """Initialize the transcription engine in a background thread."""
        device_config = self.config.get('transcription.device', 'auto')
        self.device_manager.set_device(device_config)
        audio_device_str = self.config.get('audio.input_device', 'default')
        audio_device = None if audio_device_str == 'default' else int(audio_device_str)
        model = self.config.get('transcription.model', 'base.en')
        language = self.config.get('transcription.language', 'en')
-        device = self.device_manager.get_device_for_whisper()
+        device_config = self.config.get('transcription.device', 'auto')
        compute_type = self.config.get('transcription.compute_type', 'default')
        self.current_model_size = model
@@ -284,6 +297,18 @@ class AppController:
            self.transcription_engine.set_error_callback(self._on_remote_error)
            self.transcription_engine.set_credits_low_callback(self._on_credits_low)
        else:
            # Lazy-import heavy local transcription dependencies
            global RealtimeTranscriptionEngine
            if RealtimeTranscriptionEngine is None:
                from client.transcription_engine_realtime import RealtimeTranscriptionEngine as _RTE
                RealtimeTranscriptionEngine = _RTE
            if self.device_manager:
                self.device_manager.set_device(device_config)
                device = self.device_manager.get_device_for_whisper()
            else:
                device = "cpu"
            self.transcription_engine = RealtimeTranscriptionEngine(
                model=model,
                device=device,
@@ -602,7 +627,7 @@ class AppController:
        host = self.config.get('web_server.host', '127.0.0.1')
        port = self.actual_web_port or self.config.get('web_server.port', 8080)
-        device_info = self.device_manager.get_device_info()
+        device_info = self.device_manager.get_device_info() if self.device_manager else []
        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok') and self.transcription_engine:
@@ -646,10 +671,13 @@ class AppController:
    def get_compute_devices(self) -> list[dict]:
        """List available compute devices."""
        device_info = self.device_manager.get_device_info()
        devices = [{"id": "auto", "name": "Auto-detect"}]
        if self.device_manager:
            device_info = self.device_manager.get_device_info()
            for dev_id, dev_name in device_info:
                devices.append({"id": dev_id, "name": dev_name})
        else:
            devices.append({"id": "cloud", "name": "Cloud (Deepgram)"})
        return devices
    # ── Update Checking ────────────────────────────────────────────
--- a/backend/tests/test_api_server.py
+++ b/backend/tests/test_api_server.py
@@ -79,7 +79,7 @@ async def test_start_when_not_ready(api_client, controller):
@pytest.mark.asyncio
 async def test_clear(api_client, controller):
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult
    from datetime import datetime
    controller.transcriptions = [
--- a/backend/tests/test_app_controller.py
+++ b/backend/tests/test_app_controller.py
@@ -72,7 +72,7 @@ def test_double_start_rejected(controller):
 def test_clear_transcriptions(controller):
    """clear_transcriptions should empty the list and return the count."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult
    controller.transcriptions = [
        TranscriptionResult(text="Hello", is_final=True, timestamp=datetime.now(), user_name="Alice"),
@@ -85,7 +85,7 @@ def test_clear_transcriptions(controller):
 def test_get_transcriptions_text_with_timestamps(controller):
    """get_transcriptions_text should include [HH:MM:SS] prefixes when requested."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult
    ts = datetime(2025, 1, 15, 10, 30, 45)
    controller.transcriptions = [
@@ -141,7 +141,7 @@ def test_apply_settings_no_reload_when_same(controller):
 def test_on_final_transcription_callback_fires(controller):
    """_on_final_transcription should append and invoke on_transcription callback."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult
    received = []
    controller.on_transcription = lambda data: received.append(data)
@@ -166,7 +166,7 @@ def test_on_final_transcription_callback_fires(controller):
 def test_on_final_transcription_ignored_when_not_transcribing(controller):
    """If the controller is not in transcribing state the callback should be a no-op."""
-    from client.transcription_engine_realtime import TranscriptionResult
+    from client.models import TranscriptionResult
    controller.is_transcribing = False
--- a/client/deepgram_transcription.py
+++ b/client/deepgram_transcription.py
@@ -17,7 +17,7 @@ from datetime import datetime
 from queue import Queue, Empty
 from typing import Optional, Callable
-from client.transcription_engine_realtime import TranscriptionResult
+from client.models import TranscriptionResult
 logger = logging.getLogger(__name__)
--- a/client/models.py
+++ b/client/models.py
@@ -0,0 +1,29 @@
 """Shared data models used across transcription engines."""
 from datetime import datetime
 class TranscriptionResult:
    """Represents a transcription result."""
    def __init__(self, text: str, is_final: bool, timestamp: datetime, user_name: str = ""):
        """
        Initialize transcription result.
        Args:
            text: Transcribed text
            is_final: Whether this is a final transcription or realtime preview
            timestamp: Timestamp of transcription
            user_name: Name of the user/speaker
        """
        self.text = text.strip()
        self.is_final = is_final
        self.timestamp = timestamp
        self.user_name = user_name
    def __repr__(self) -> str:
        time_str = self.timestamp.strftime("%H:%M:%S")
        prefix = "[FINAL]" if self.is_final else "[PREVIEW]"
        if self.user_name and self.user_name.strip():
            return f"{prefix} [{time_str}] {self.user_name}: {self.text}"
        return f"{prefix} [{time_str}] {self.text}"
--- a/client/transcription_engine_realtime.py
+++ b/client/transcription_engine_realtime.py
@@ -8,30 +8,8 @@ from threading import Lock
 import logging
-class TranscriptionResult:
+# Re-export TranscriptionResult from the shared models module for backward compatibility
-    """Represents a transcription result."""
+from client.models import TranscriptionResult  # noqa: F401
    def __init__(self, text: str, is_final: bool, timestamp: datetime, user_name: str = ""):
        """
        Initialize transcription result.
        Args:
            text: Transcribed text
            is_final: Whether this is a final transcription or realtime preview
            timestamp: Timestamp of transcription
            user_name: Name of the user/speaker
        """
        self.text = text.strip()
        self.is_final = is_final
        self.timestamp = timestamp
        self.user_name = user_name
    def __repr__(self) -> str:
        time_str = self.timestamp.strftime("%H:%M:%S")
        prefix = "[FINAL]" if self.is_final else "[PREVIEW]"
        if self.user_name and self.user_name.strip():
            return f"{prefix} [{time_str}] {self.user_name}: {self.text}"
        return f"{prefix} [{time_str}] {self.text}"
    def to_dict(self) -> dict:
        """Convert to dictionary."""
--- a/local-transcription-cloud.spec
+++ b/local-transcription-cloud.spec
@@ -0,0 +1,152 @@
 # -*- mode: python ; coding: utf-8 -*-
 """PyInstaller spec file for cloud-only Local Transcription backend.
 This builds a lightweight sidecar (~50MB) that only supports Deepgram
 cloud transcription (managed + BYOK). No local Whisper models, no
 PyTorch, no CUDA -- just audio capture and WebSocket streaming.
 """
 import sys
 import os
 block_cipher = None
 is_windows = sys.platform == 'win32'
 from PyInstaller.utils.hooks import collect_submodules, collect_data_files
 # Data files
 datas = [
    ('config/default_config.yaml', 'config'),
 ]
 # Hidden imports -- only lightweight deps needed for Deepgram streaming
 hiddenimports = [
    'sounddevice',
    'numpy',
    # FastAPI and dependencies
    'fastapi',
    'fastapi.routing',
    'fastapi.responses',
    'starlette',
    'starlette.applications',
    'starlette.routing',
    'starlette.responses',
    'starlette.websockets',
    'starlette.middleware',
    'starlette.middleware.cors',
    'pydantic',
    'pydantic.fields',
    'pydantic.main',
    'anyio',
    'anyio._backends',
    'anyio._backends._asyncio',
    'sniffio',
    # Uvicorn
    'uvicorn',
    'uvicorn.logging',
    'uvicorn.loops',
    'uvicorn.loops.auto',
    'uvicorn.protocols',
    'uvicorn.protocols.http',
    'uvicorn.protocols.http.auto',
    'uvicorn.protocols.http.h11_impl',
    'uvicorn.protocols.websockets',
    'uvicorn.protocols.websockets.auto',
    'uvicorn.protocols.websockets.wsproto_impl',
    'uvicorn.lifespan',
    'uvicorn.lifespan.on',
    'h11',
    'websockets',
    'websockets.legacy',
    'websockets.legacy.server',
    # HTTP client
    'requests',
    'urllib3',
    'certifi',
    'charset_normalizer',
 ]
 # Collect submodules for key packages
 print("Collecting submodules for cloud backend packages...")
 for package in ['fastapi', 'starlette', 'pydantic', 'pydantic_core', 'anyio', 'uvicorn', 'websockets', 'h11']:
    try:
        submodules = collect_submodules(package)
        hiddenimports += submodules
        print(f"  + Collected {len(submodules)} submodules from {package}")
    except Exception as e:
        print(f"  - Warning: Could not collect {package}: {e}")
 # Collect data files
 for package in ['fastapi', 'starlette', 'pydantic', 'uvicorn']:
    try:
        data_files = collect_data_files(package)
        if data_files:
            datas += data_files
    except Exception:
        pass
 # Pydantic critical deps
 hiddenimports += [
    'colorsys', 'decimal', 'json', 'ipaddress', 'pathlib', 'uuid',
    'email.message', 'typing_extensions',
 ]
 a = Analysis(
    ['backend/main_headless.py'],
    pathex=[],
    binaries=[],
    datas=datas,
    hiddenimports=hiddenimports,
    hookspath=['hooks'],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[
        # Exclude all heavy ML/local transcription deps
        'torch', 'torchaudio', 'torchvision',
        'faster_whisper', 'ctranslate2',
        'RealtimeSTT', 'webrtcvad', 'webrtcvad_wheels',
        'silero_vad', 'onnxruntime',
        'openwakeword', 'pvporcupine', 'pyaudio',
        'noisereduce', 'scipy',
        # Exclude GUI frameworks
        'PySide6', 'PyQt5', 'PyQt6', 'tkinter',
        # Exclude other unnecessary heavy packages
        'matplotlib', 'PIL', 'cv2',
    ],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
 )
 pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name='local-transcription-backend',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    icon='LocalTranscription.ico' if is_windows else None,
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.zipfiles,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name='local-transcription-backend',
 )
--- a/src/lib/components/SidecarSetup.svelte
+++ b/src/lib/components/SidecarSetup.svelte
@@ -114,6 +114,19 @@
            <span class="variant-desc">Faster transcription with NVIDIA GPU (~2 GB download)</span>
          </div>
        </label>
        <label class="variant-option" class:selected={variant === "cloud"}>
          <input
            type="radio"
            name="variant"
            value="cloud"
            bind:group={variant}
          />
          <div class="variant-info">
            <span class="variant-name">Cloud Only (Deepgram)</span>
            <span class="variant-desc">Lightweight, requires Deepgram API key (~50 MB download)</span>
          </div>
        </label>
      </div>
      <button class="download-btn" onclick={startDownload}>