local-transcription/local-transcription-cloud.spec

# -*- mode: python ; coding: utf-8 -*-
"""PyInstaller spec file for cloud-only Local Transcription backend.

This builds a lightweight sidecar (~50MB) that only supports Deepgram
cloud transcription (managed + BYOK). No local Whisper models, no
PyTorch, no CUDA -- just audio capture and WebSocket streaming.
"""

import sys
import os

block_cipher = None
is_windows = sys.platform == 'win32'

from PyInstaller.utils.hooks import collect_submodules, collect_data_files

# Data files
datas = [
    ('config/default_config.yaml', 'config'),
]

# Collect sounddevice's bundled PortAudio library (_sounddevice_data)
try:
    import sounddevice
    sd_path = os.path.dirname(sounddevice.__file__)
    sd_data = os.path.join(sd_path, '_sounddevice_data')
    if os.path.exists(sd_data):
        datas.append((sd_data, '_sounddevice_data'))
        print(f"  + Collected sounddevice PortAudio data from {sd_data}")
    # Also collect the package itself
    sd_datas = collect_data_files('sounddevice')
    if sd_datas:
        datas += sd_datas
        print(f"  + Collected {len(sd_datas)} sounddevice data files")
except ImportError:
    print("  - Warning: sounddevice not found")

# Hidden imports -- only lightweight deps needed for Deepgram streaming
hiddenimports = [
    'sounddevice',
    '_sounddevice_data',
    'numpy',
    # FastAPI and dependencies
    'fastapi',
    'fastapi.routing',
    'fastapi.responses',
    'starlette',
    'starlette.applications',
    'starlette.routing',
    'starlette.responses',
    'starlette.websockets',
    'starlette.middleware',
    'starlette.middleware.cors',
    'pydantic',
    'pydantic.fields',
    'pydantic.main',
    'anyio',
    'anyio._backends',
    'anyio._backends._asyncio',
    'sniffio',
    # Uvicorn
    'uvicorn',
    'uvicorn.logging',
    'uvicorn.loops',
    'uvicorn.loops.auto',
    'uvicorn.protocols',
    'uvicorn.protocols.http',
    'uvicorn.protocols.http.auto',
    'uvicorn.protocols.http.h11_impl',
    'uvicorn.protocols.websockets',
    'uvicorn.protocols.websockets.auto',
    'uvicorn.protocols.websockets.wsproto_impl',
    'uvicorn.lifespan',
    'uvicorn.lifespan.on',
    'h11',
    'websockets',
    'websockets.legacy',
    'websockets.legacy.server',
    # HTTP client
    'requests',
    'urllib3',
    'certifi',
    'charset_normalizer',
]

# Collect submodules for key packages
print("Collecting submodules for cloud backend packages...")
for package in ['fastapi', 'starlette', 'pydantic', 'pydantic_core', 'anyio', 'uvicorn', 'websockets', 'h11']:
    try:
        submodules = collect_submodules(package)
        hiddenimports += submodules
        print(f"  + Collected {len(submodules)} submodules from {package}")
    except Exception as e:
        print(f"  - Warning: Could not collect {package}: {e}")

# Collect data files
for package in ['fastapi', 'starlette', 'pydantic', 'uvicorn']:
    try:
        data_files = collect_data_files(package)
        if data_files:
            datas += data_files
    except Exception:
        pass

# Pydantic critical deps
hiddenimports += [
    'colorsys', 'decimal', 'json', 'ipaddress', 'pathlib', 'uuid',
    'email.message', 'typing_extensions',
]

a = Analysis(
    ['backend/main_headless.py'],
    pathex=[],
    binaries=[],
    datas=datas,
    hiddenimports=hiddenimports,
    hookspath=['hooks'],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[
        # Exclude all heavy ML/local transcription deps
        'torch', 'torchaudio', 'torchvision',
        'faster_whisper', 'ctranslate2',
        'RealtimeSTT', 'webrtcvad', 'webrtcvad_wheels',
        'silero_vad', 'onnxruntime',
        'openwakeword', 'pvporcupine', 'pyaudio',
        'noisereduce', 'scipy',
        # Exclude GUI frameworks
        'PySide6', 'PyQt5', 'PyQt6', 'tkinter',
        # Exclude other unnecessary heavy packages
        'matplotlib', 'PIL', 'cv2',
    ],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
)

pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)

exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name='local-transcription-backend',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    icon='LocalTranscription.ico' if is_windows else None,
)

coll = COLLECT(
    exe,
    a.binaries,
    a.zipfiles,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name='local-transcription-backend',
)