Cross-platform distribution, UI improvements, and performance optimizations

- PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 21:33:43 -07:00
parent 42ccd3e21d
commit 58faa83cb3
27 changed files with 1301 additions and 283 deletions
@@ -0,0 +1,136 @@
 name: Build & Release
 on:
  push:
    branches: [main]
    tags: ["v*"]
  pull_request:
    branches: [main]
 env:
  PYTHON_VERSION: "3.11"
  NODE_VERSION: "20"
 jobs:
  build-sidecar:
    name: Build sidecar (${{ matrix.target }})
    runs-on: ${{ matrix.runner }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - runner: ubuntu-latest
            target: x86_64-unknown-linux-gnu
            platform: linux
          - runner: windows-latest
            target: x86_64-pc-windows-msvc
            platform: windows
          - runner: macos-latest
            target: aarch64-apple-darwin
            platform: macos
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
      - name: Build sidecar
        working-directory: python
        run: python build_sidecar.py --cpu-only
      - name: Upload sidecar artifact
        uses: actions/upload-artifact@v4
        with:
          name: sidecar-${{ matrix.target }}
          path: python/dist/voice-to-notes-sidecar/
          retention-days: 7
  build-tauri:
    name: Build app (${{ matrix.target }})
    needs: build-sidecar
    runs-on: ${{ matrix.runner }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - runner: ubuntu-latest
            target: x86_64-unknown-linux-gnu
            platform: linux
          - runner: windows-latest
            target: x86_64-pc-windows-msvc
            platform: windows
          - runner: macos-latest
            target: aarch64-apple-darwin
            platform: macos
    steps:
      - uses: actions/checkout@v4
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
          # Note: 'cache: npm' requires the Gitea instance to have
          # Actions cache configured. Remove this if caching is unavailable.
          cache: npm
      - name: Install Rust stable
        uses: dtolnay/rust-toolchain@stable
      - name: Install system dependencies (Linux)
        if: matrix.platform == 'linux'
        run: |
          sudo apt-get update
          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
      - name: Download sidecar artifact
        uses: actions/download-artifact@v4
        with:
          name: sidecar-${{ matrix.target }}
          path: src-tauri/binaries/
      - name: Make sidecar executable (Unix)
        if: matrix.platform != 'windows'
        run: chmod +x src-tauri/binaries/voice-to-notes-sidecar-${{ matrix.target }}
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
        env:
          TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
          TAURI_CONFIG: '{"bundle":{"externalBin":["binaries/voice-to-notes-sidecar"]}}'
      - name: Upload app artifacts (Linux)
        if: matrix.platform == 'linux'
        uses: actions/upload-artifact@v4
        with:
          name: app-${{ matrix.target }}
          path: |
            src-tauri/target/release/bundle/deb/*.deb
            src-tauri/target/release/bundle/appimage/*.AppImage
          retention-days: 30
      - name: Upload app artifacts (Windows)
        if: matrix.platform == 'windows'
        uses: actions/upload-artifact@v4
        with:
          name: app-${{ matrix.target }}
          path: |
            src-tauri/target/release/bundle/msi/*.msi
            src-tauri/target/release/bundle/nsis/*.exe
          retention-days: 30
      - name: Upload app artifacts (macOS)
        if: matrix.platform == 'macos'
        uses: actions/upload-artifact@v4
        with:
          name: app-${{ matrix.target }}
          path: |
            src-tauri/target/release/bundle/dmg/*.dmg
            src-tauri/target/release/bundle/macos/*.app
          retention-days: 30
@@ -0,0 +1,141 @@
 name: Build & Release
 on:
  push:
    branches: [main]
    tags: ["v*"]
  pull_request:
    branches: [main]
  workflow_dispatch:
 env:
  PYTHON_VERSION: "3.11"
  NODE_VERSION: "20"
 jobs:
  build-sidecar:
    name: Build sidecar (${{ matrix.target }})
    runs-on: ${{ matrix.runner }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - runner: ubuntu-20.04
            target: x86_64-unknown-linux-gnu
            platform: linux
          - runner: windows-latest
            target: x86_64-pc-windows-msvc
            platform: windows
          - runner: macos-13
            target: x86_64-apple-darwin
            platform: macos-intel
          - runner: macos-14
            target: aarch64-apple-darwin
            platform: macos-arm
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
      - name: Build sidecar
        working-directory: python
        run: python build_sidecar.py --cpu-only
      - name: Upload sidecar artifact
        uses: actions/upload-artifact@v4
        with:
          name: sidecar-${{ matrix.target }}
          path: python/dist/voice-to-notes-sidecar/
          retention-days: 7
  build-tauri:
    name: Build app (${{ matrix.target }})
    needs: build-sidecar
    runs-on: ${{ matrix.runner }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - runner: ubuntu-20.04
            target: x86_64-unknown-linux-gnu
            platform: linux
          - runner: windows-latest
            target: x86_64-pc-windows-msvc
            platform: windows
          - runner: macos-13
            target: x86_64-apple-darwin
            platform: macos-intel
          - runner: macos-14
            target: aarch64-apple-darwin
            platform: macos-arm
    steps:
      - uses: actions/checkout@v4
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
          cache: npm
      - name: Install Rust stable
        uses: dtolnay/rust-toolchain@stable
      - name: Install system dependencies (Linux)
        if: matrix.platform == 'linux'
        run: |
          sudo apt-get update
          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
      - name: Download sidecar artifact
        uses: actions/download-artifact@v4
        with:
          name: sidecar-${{ matrix.target }}
          path: src-tauri/binaries/
      - name: Make sidecar executable (Unix)
        if: matrix.platform != 'windows'
        run: chmod +x src-tauri/binaries/voice-to-notes-sidecar-${{ matrix.target }}
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
        env:
          TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
          TAURI_CONFIG: '{"bundle":{"externalBin":["binaries/voice-to-notes-sidecar"]}}'
      - name: Upload app artifacts (Linux)
        if: matrix.platform == 'linux'
        uses: actions/upload-artifact@v4
        with:
          name: app-${{ matrix.target }}
          path: |
            src-tauri/target/release/bundle/deb/*.deb
            src-tauri/target/release/bundle/appimage/*.AppImage
          retention-days: 30
      - name: Upload app artifacts (Windows)
        if: matrix.platform == 'windows'
        uses: actions/upload-artifact@v4
        with:
          name: app-${{ matrix.target }}
          path: |
            src-tauri/target/release/bundle/msi/*.msi
            src-tauri/target/release/bundle/nsis/*.exe
          retention-days: 30
      - name: Upload app artifacts (macOS)
        if: startsWith(matrix.platform, 'macos')
        uses: actions/upload-artifact@v4
        with:
          name: app-${{ matrix.target }}
          path: |
            src-tauri/target/release/bundle/dmg/*.dmg
            src-tauri/target/release/bundle/macos/*.app
          retention-days: 30
@@ -46,3 +46,9 @@ Thumbs.db
 *.ogg
 *.flac
 !test/fixtures/*
 # Sidecar build artifacts
 src-tauri/binaries/*
 !src-tauri/binaries/.gitkeep
 python/dist/
 python/build/
@@ -8,7 +8,7 @@ Desktop app for transcribing audio/video with speaker identification. Runs local
 - **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
 - **Database:** SQLite (via rusqlite in Rust)
 - **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
- **Cloud AI providers:** LiteLLM, OpenAI, Anthropic (optional, user-configured)
+- **Cloud AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (optional, user-configured)
 - **Caption export:** pysubs2 (Python)
 - **Audio UI:** wavesurfer.js
 - **Transcript editor:** TipTap (ProseMirror)
@@ -40,7 +40,13 @@ docs/                   # Architecture and design documents
 - Database: UUIDs as primary keys (TEXT type in SQLite)
 - All timestamps in milliseconds (integer) relative to media file start
 ## Distribution
 - Python sidecar is frozen via PyInstaller into a standalone binary for distribution
 - Tauri bundles the sidecar via `externalBin` — no Python required for end users
 - CI/CD builds on Gitea Actions (Linux, Windows, macOS ARM)
 - Dev mode uses system Python (`VOICE_TO_NOTES_DEV=1` or debug builds)
 ## Platform Targets
- Linux (primary development target)
+- Linux x86_64 (primary development target)
- Windows (must work, tested before release)
+- Windows x86_64
- macOS (future, not yet targeted)
+- macOS aarch64 (Apple Silicon)
@@ -2,28 +2,90 @@
 A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
-## Goals
+## Features
- **Speech-to-Text Transcription** — Accurately convert spoken audio from recordings into text
+- **Speech-to-Text Transcription** — Accurate transcription via faster-whisper (Whisper models) with word-level timestamps
- **Speaker Identification (Diarization)** — Detect and distinguish between different speakers in a conversation
+- **Speaker Identification (Diarization)** — Detect and distinguish between speakers using pyannote.audio
- **Speaker Naming** — Assign and persist speaker names/IDs across the transcription
+- **Synchronized Playback** — Click any word to seek to that point in the audio (Web Audio API for instant playback)
- **Synchronized Playback** — Click any transcribed text segment to play back the corresponding audio for review and correction
+- **AI Integration** — Ask questions about your transcript via OpenAI, Anthropic, or any OpenAI-compatible API (LiteLLM proxies, Ollama, vLLM)
- **Export Formats**
+- **Export Formats** — SRT, WebVTT, ASS captions, plain text, and Markdown with speaker labels
-  - Closed captioning files (SRT, VTT) for video
+- **Cross-Platform** — Builds for Linux, Windows, and macOS (Apple Silicon)
  - Plain text documents with speaker labels
 - **AI Integration** — Connect to AI providers to ask questions about the conversation and generate condensed notes/summaries
 ## Platform Support
-| Platform | Status |
+| Platform | Architecture | Status |
-|----------|--------|
+|----------|-------------|--------|
-| Linux    | Planned (initial target) |
+| Linux    | x86_64      | Supported |
-| Windows  | Planned (initial target) |
+| Windows  | x86_64      | Supported |
-| macOS    | Future (pending hardware) |
+| macOS    | ARM (Apple Silicon) | Supported |
-## Project Status
+## Tech Stack
-**Early planning phase** — Architecture and technology decisions in progress.
+- **Desktop shell:** Tauri v2 (Rust backend + Svelte 5 / TypeScript frontend)
 - **ML pipeline:** Python sidecar (faster-whisper, pyannote.audio) — frozen via PyInstaller for distribution
 - **Audio playback:** wavesurfer.js with Web Audio API backend
 - **AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (local or remote)
 - **Local AI:** Bundled llama-server (llama.cpp)
 - **Caption export:** pysubs2
 ## Development
 ### Prerequisites
 - Node.js 20+
 - Rust (stable)
 - Python 3.11+ with ML dependencies
 - System: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev` (Linux)
 ### Getting Started
 ```bash
 # Install frontend dependencies
 npm install
 # Install Python sidecar dependencies
 cd python && pip install -e . && cd ..
 # Run in dev mode (uses system Python for the sidecar)
 npm run tauri:dev
 ```
 ### Building for Distribution
 ```bash
 # Build the frozen Python sidecar
 npm run sidecar:build
 # Build the Tauri app (requires sidecar in src-tauri/binaries/)
 npm run tauri build
 ```
 ### CI/CD
 Gitea Actions workflows are in `.gitea/workflows/`. The build pipeline:
 1. **Build sidecar** — PyInstaller-frozen Python binary per platform (CPU-only PyTorch)
 2. **Build Tauri app** — Bundles the sidecar via `externalBin`, produces .deb/.AppImage (Linux), .msi (Windows), .dmg (macOS)
 #### Required Secrets
 | Secret | Purpose | Required? |
 |--------|---------|-----------|
 | `TAURI_SIGNING_PRIVATE_KEY` | Signs Tauri update bundles | Optional (for auto-updates) |
 No other secrets are needed for building. AI provider API keys and HuggingFace tokens are configured by end users in the app's Settings.
 ### Project Structure
 ```
 src/                    # Svelte 5 frontend
 src-tauri/              # Rust backend (Tauri commands, sidecar manager, SQLite)
 python/                 # Python sidecar (transcription, diarization, AI)
  voice_to_notes/       # Python package
  build_sidecar.py      # PyInstaller build script
  voice_to_notes.spec   # PyInstaller spec
 .gitea/workflows/       # Gitea Actions CI/CD
 ```
 ## License
@@ -11,7 +11,9 @@
    "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
    "lint": "eslint .",
    "test": "vitest",
-    "tauri": "tauri"
+    "tauri": "tauri",
    "tauri:dev": "VOICE_TO_NOTES_DEV=1 tauri dev",
    "sidecar:build": "cd python && python3 build_sidecar.py"
  },
  "license": "MIT",
  "dependencies": {
@@ -0,0 +1,215 @@
 #!/usr/bin/env python3
 """Build the Voice to Notes sidecar as a standalone binary using PyInstaller.
 Usage:
    python build_sidecar.py [--cpu-only]
 Produces a directory `dist/voice-to-notes-sidecar/` containing the frozen
 sidecar binary and all dependencies. The main binary is renamed to include
 the Tauri target triple for externalBin resolution.
 """
 from __future__ import annotations
 import argparse
 import os
 import platform
 import shutil
 import stat
 import subprocess
 import sys
 import urllib.request
 import zipfile
 from pathlib import Path
 SCRIPT_DIR = Path(__file__).resolve().parent
 DIST_DIR = SCRIPT_DIR / "dist"
 BUILD_DIR = SCRIPT_DIR / "build"
 SPEC_FILE = SCRIPT_DIR / "voice_to_notes.spec"
 # Static ffmpeg download URLs (GPL-licensed builds)
 FFMPEG_URLS: dict[str, str] = {
    "linux-x86_64": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
    "darwin-x86_64": "https://evermeet.cx/ffmpeg/getrelease/zip",
    "darwin-arm64": "https://evermeet.cx/ffmpeg/getrelease/zip",
    "win32-x86_64": "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip",
 }
 def get_target_triple() -> str:
    """Determine the Tauri-compatible target triple for the current platform."""
    machine = platform.machine().lower()
    system = platform.system().lower()
    arch_map = {
        "x86_64": "x86_64",
        "amd64": "x86_64",
        "aarch64": "aarch64",
        "arm64": "aarch64",
    }
    arch = arch_map.get(machine, machine)
    if system == "linux":
        return f"{arch}-unknown-linux-gnu"
    elif system == "darwin":
        return f"{arch}-apple-darwin"
    elif system == "windows":
        return f"{arch}-pc-windows-msvc"
    else:
        return f"{arch}-unknown-{system}"
 def create_venv_and_install(cpu_only: bool) -> Path:
    """Create a fresh venv and install dependencies."""
    venv_dir = BUILD_DIR / "sidecar-venv"
    if venv_dir.exists():
        shutil.rmtree(venv_dir)
    print(f"[build] Creating venv at {venv_dir}")
    subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
    # Determine pip and python paths inside venv
    if sys.platform == "win32":
        pip = str(venv_dir / "Scripts" / "pip")
        python = str(venv_dir / "Scripts" / "python")
    else:
        pip = str(venv_dir / "bin" / "pip")
        python = str(venv_dir / "bin" / "python")
    # Upgrade pip
    subprocess.run([pip, "install", "--upgrade", "pip"], check=True)
    # Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
    if cpu_only:
        print("[build] Installing PyTorch (CPU-only)")
        subprocess.run(
            [pip, "install", "torch", "torchaudio",
             "--index-url", "https://download.pytorch.org/whl/cpu"],
            check=True,
        )
    else:
        print("[build] Installing PyTorch (default, may include CUDA)")
        subprocess.run([pip, "install", "torch", "torchaudio"], check=True)
    # Install project and dev deps (includes pyinstaller)
    print("[build] Installing project dependencies")
    subprocess.run([pip, "install", "-e", f"{SCRIPT_DIR}[dev]"], check=True)
    return Path(python)
 def run_pyinstaller(python: Path) -> Path:
    """Run PyInstaller using the spec file."""
    print("[build] Running PyInstaller")
    subprocess.run(
        [str(python), "-m", "PyInstaller", "--clean", "--noconfirm", str(SPEC_FILE)],
        cwd=str(SCRIPT_DIR),
        check=True,
    )
    output_dir = DIST_DIR / "voice-to-notes-sidecar"
    if not output_dir.exists():
        raise RuntimeError(f"PyInstaller output not found at {output_dir}")
    return output_dir
 def download_ffmpeg(output_dir: Path) -> None:
    """Download a static ffmpeg/ffprobe binary for the current platform."""
    system = sys.platform
    machine = platform.machine().lower()
    if machine in ("amd64", "x86_64"):
        machine = "x86_64"
    elif machine in ("aarch64", "arm64"):
        machine = "arm64"
    key = f"{system}-{machine}"
    if system == "win32":
        key = f"win32-{machine}"
    elif system == "linux":
        key = f"linux-{machine}"
    url = FFMPEG_URLS.get(key)
    if not url:
        print(f"[build] Warning: No ffmpeg download URL for platform {key}, skipping")
        return
    print(f"[build] Downloading ffmpeg for {key}")
    tmp_path = output_dir / "ffmpeg_download"
    try:
        urllib.request.urlretrieve(url, str(tmp_path))
        if url.endswith(".tar.xz"):
            # Linux static build
            import tarfile
            with tarfile.open(str(tmp_path), "r:xz") as tar:
                for member in tar.getmembers():
                    basename = os.path.basename(member.name)
                    if basename in ("ffmpeg", "ffprobe"):
                        member.name = basename
                        tar.extract(member, path=str(output_dir))
                        dest = output_dir / basename
                        dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
        elif url.endswith(".zip"):
            with zipfile.ZipFile(str(tmp_path), "r") as zf:
                for name in zf.namelist():
                    basename = os.path.basename(name)
                    if basename in ("ffmpeg", "ffprobe", "ffmpeg.exe", "ffprobe.exe"):
                        data = zf.read(name)
                        dest = output_dir / basename
                        dest.write_bytes(data)
                        if sys.platform != "win32":
                            dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
        print("[build] ffmpeg downloaded successfully")
    except Exception as e:
        print(f"[build] Warning: Failed to download ffmpeg: {e}")
    finally:
        if tmp_path.exists():
            tmp_path.unlink()
 def rename_binary(output_dir: Path, target_triple: str) -> None:
    """Rename the main binary to include the target triple for Tauri."""
    if sys.platform == "win32":
        src = output_dir / "voice-to-notes-sidecar.exe"
        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}.exe"
    else:
        src = output_dir / "voice-to-notes-sidecar"
        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}"
    if src.exists():
        print(f"[build] Renaming {src.name} -> {dst.name}")
        src.rename(dst)
    else:
        print(f"[build] Warning: Expected binary not found at {src}")
 def main() -> None:
    parser = argparse.ArgumentParser(description="Build the Voice to Notes sidecar binary")
    parser.add_argument(
        "--cpu-only",
        action="store_true",
        default=True,
        help="Install CPU-only PyTorch (default: True, avoids bundling CUDA)",
    )
    parser.add_argument(
        "--with-cuda",
        action="store_true",
        help="Install PyTorch with CUDA support",
    )
    args = parser.parse_args()
    cpu_only = not args.with_cuda
    target_triple = get_target_triple()
    print(f"[build] Target triple: {target_triple}")
    print(f"[build] CPU-only: {cpu_only}")
    python = create_venv_and_install(cpu_only)
    output_dir = run_pyinstaller(python)
    download_ffmpeg(output_dir)
    rename_binary(output_dir, target_triple)
    print(f"\n[build] Done! Sidecar built at: {output_dir}")
    print(f"[build] Copy contents to src-tauri/binaries/ for Tauri bundling")
 if __name__ == "__main__":
    main()
@@ -13,6 +13,8 @@ dependencies = [
    "faster-whisper>=1.1.0",
    "pyannote.audio>=3.1.0",
    "pysubs2>=1.7.0",
    "openai>=1.0.0",
    "anthropic>=0.20.0",
 ]
 [project.optional-dependencies]
@@ -20,6 +22,7 @@ dev = [
    "ruff>=0.8.0",
    "pytest>=8.0.0",
    "pytest-asyncio>=0.24.0",
    "pyinstaller>=6.0",
 ]
 [tool.ruff]
@@ -0,0 +1,67 @@
 # -*- mode: python ; coding: utf-8 -*-
 """PyInstaller spec for the Voice to Notes sidecar binary."""
 from PyInstaller.utils.hooks import collect_all
 block_cipher = None
 # Collect all files for packages that have shared libraries / data files
 # PyInstaller often misses these for ML packages
 ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all("ctranslate2")
 faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all(
    "faster_whisper"
 )
 pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
 a = Analysis(
    ["voice_to_notes/main.py"],
    pathex=[],
    binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries,
    datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas,
    hiddenimports=[
        "torch",
        "torchaudio",
        "huggingface_hub",
        "pysubs2",
        "openai",
        "anthropic",
        "litellm",
    ]
    + ctranslate2_hiddenimports
    + faster_whisper_hiddenimports
    + pyannote_hiddenimports,
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=["tkinter", "test", "unittest", "pip", "setuptools"],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
 )
 pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name="voice-to-notes-sidecar",
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.zipfiles,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name="voice-to-notes-sidecar",
 )
@@ -2,7 +2,10 @@
 from __future__ import annotations
 import ctypes
 import os
 import platform
 import subprocess
 import sys
 from dataclasses import dataclass
@@ -21,6 +24,77 @@ class HardwareInfo:
    recommended_compute_type: str = "int8"
 def _detect_ram_mb() -> int:
    """Detect total system RAM in MB (cross-platform).
    Tries platform-specific methods in order:
    1. Linux: read /proc/meminfo
    2. macOS: sysctl hw.memsize
    3. Windows: GlobalMemoryStatusEx via ctypes
    4. Fallback: os.sysconf (most Unix systems)
    Returns 0 if all methods fail.
    """
    # Linux: read /proc/meminfo
    if sys.platform == "linux":
        try:
            with open("/proc/meminfo") as f:
                for line in f:
                    if line.startswith("MemTotal:"):
                        # Value is in kB
                        return int(line.split()[1]) // 1024
        except (FileNotFoundError, ValueError, OSError):
            pass
    # macOS: sysctl hw.memsize (returns bytes)
    if sys.platform == "darwin":
        try:
            result = subprocess.run(
                ["sysctl", "-n", "hw.memsize"],
                capture_output=True,
                text=True,
                check=True,
            )
            return int(result.stdout.strip()) // (1024 * 1024)
        except (subprocess.SubprocessError, ValueError, OSError):
            pass
    # Windows: GlobalMemoryStatusEx via ctypes
    if sys.platform == "win32":
        try:
            class MEMORYSTATUSEX(ctypes.Structure):
                _fields_ = [
                    ("dwLength", ctypes.c_ulong),
                    ("dwMemoryLoad", ctypes.c_ulong),
                    ("ullTotalPhys", ctypes.c_ulonglong),
                    ("ullAvailPhys", ctypes.c_ulonglong),
                    ("ullTotalPageFile", ctypes.c_ulonglong),
                    ("ullAvailPageFile", ctypes.c_ulonglong),
                    ("ullTotalVirtual", ctypes.c_ulonglong),
                    ("ullAvailVirtual", ctypes.c_ulonglong),
                    ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
                ]
            mem_status = MEMORYSTATUSEX()
            mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
            if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
                return int(mem_status.ullTotalPhys) // (1024 * 1024)
        except (AttributeError, OSError):
            pass
    # Fallback: os.sysconf (works on most Unix systems)
    try:
        page_size = os.sysconf("SC_PAGE_SIZE")
        phys_pages = os.sysconf("SC_PHYS_PAGES")
        if page_size > 0 and phys_pages > 0:
            return (page_size * phys_pages) // (1024 * 1024)
    except (ValueError, OSError, AttributeError):
        pass
    return 0
 def detect_hardware() -> HardwareInfo:
    """Detect available hardware and recommend model configuration."""
    info = HardwareInfo()
@@ -28,16 +102,8 @@ def detect_hardware() -> HardwareInfo:
    # CPU info
    info.cpu_cores = os.cpu_count() or 1
-    # RAM info
+    # RAM info (cross-platform)
-    try:
+    info.ram_mb = _detect_ram_mb()
        with open("/proc/meminfo") as f:
            for line in f:
                if line.startswith("MemTotal:"):
                    # Value is in kB
                    info.ram_mb = int(line.split()[1]) // 1024
                    break
    except (FileNotFoundError, ValueError):
        pass
    # CUDA detection
    try:
@@ -260,10 +260,12 @@ def make_ai_chat_handler() -> HandlerFunc:
                    model=config.get("model", "claude-sonnet-4-6"),
                ))
            elif provider_name == "litellm":
-                from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+                from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
-                service.register_provider("litellm", LiteLLMProvider(
+                service.register_provider("litellm", OpenAICompatibleProvider(
                    model=config.get("model", "gpt-4o-mini"),
                    api_key=config.get("api_key"),
                    api_base=config.get("api_base"),
                ))
            return IPCMessage(
                id=msg.id,
@@ -1,4 +1,4 @@
-"""LiteLLM provider — multi-provider gateway."""
+"""OpenAI-compatible provider — works with any OpenAI-compatible API endpoint."""
 from __future__ import annotations
@@ -7,36 +7,44 @@ from typing import Any
 from voice_to_notes.providers.base import AIProvider
-class LiteLLMProvider(AIProvider):
+class OpenAICompatibleProvider(AIProvider):
-    """Routes through LiteLLM for access to 100+ LLM providers."""
+    """Connects to any OpenAI-compatible API (LiteLLM proxy, Ollama, vLLM, etc.)."""
-    def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
+    def __init__(
        self,
        api_key: str | None = None,
        api_base: str | None = None,
        model: str = "gpt-4o-mini",
        **kwargs: Any,
    ) -> None:
        self._api_key = api_key or "sk-no-key"
        self._api_base = api_base
        self._model = model
        self._extra_kwargs = kwargs
    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
-        try:
+        from openai import OpenAI
            import litellm
        except ImportError:
            raise RuntimeError("litellm package is required. Install with: pip install litellm")
-        merged_kwargs = {**self._extra_kwargs, **kwargs}
+        client_kwargs: dict[str, Any] = {"api_key": self._api_key}
-        response = litellm.completion(
+        if self._api_base:
-            model=merged_kwargs.get("model", self._model),
+            client_kwargs["base_url"] = self._api_base
        client = OpenAI(**client_kwargs)
        response = client.chat.completions.create(
            model=kwargs.get("model", self._model),
            messages=messages,
-            temperature=merged_kwargs.get("temperature", 0.7),
+            temperature=kwargs.get("temperature", 0.7),
-            max_tokens=merged_kwargs.get("max_tokens", 2048),
+            max_tokens=kwargs.get("max_tokens", 2048),
        )
        return response.choices[0].message.content or ""
    def is_available(self) -> bool:
        try:
-            import litellm  # noqa: F401
+            import openai  # noqa: F401
-
+            return bool(self._api_key and self._api_base)
            return True
        except ImportError:
            return False
    @property
    def name(self) -> str:
-        return "LiteLLM"
+        return "OpenAI Compatible"
@@ -92,7 +92,7 @@ class AIProviderService:
 def create_default_service() -> AIProviderService:
    """Create an AIProviderService with all supported providers registered."""
    from voice_to_notes.providers.anthropic_provider import AnthropicProvider
-    from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+    from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
    from voice_to_notes.providers.local_provider import LocalProvider
    from voice_to_notes.providers.openai_provider import OpenAIProvider
@@ -100,5 +100,5 @@ def create_default_service() -> AIProviderService:
    service.register_provider("local", LocalProvider())
    service.register_provider("openai", OpenAIProvider())
    service.register_provider("anthropic", AnthropicProvider())
-    service.register_provider("litellm", LiteLLMProvider())
+    service.register_provider("litellm", OpenAICompatibleProvider())
    return service
@@ -16,6 +16,7 @@ from typing import Any
 # np.isfinite(None) crashes when max_speakers is not set.
 os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
 from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
@@ -40,7 +41,7 @@ def _ensure_wav(file_path: str) -> tuple[str, str | None]:
    try:
        subprocess.run(
            [
-                "ffmpeg", "-y", "-i", file_path,
+                get_ffmpeg_path(), "-y", "-i", file_path,
                "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
                tmp.name,
            ],
@@ -118,6 +119,14 @@ class DiarizeService:
                self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
                print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
                # Move pipeline to GPU if available
                try:
                    import torch
                    if torch.cuda.is_available():
                        self._pipeline = self._pipeline.to(torch.device("cuda"))
                        print(f"[sidecar] Diarization pipeline moved to GPU", file=sys.stderr, flush=True)
                except Exception as e:
                    print(f"[sidecar] GPU not available for diarization: {e}", file=sys.stderr, flush=True)
                return self._pipeline
            except Exception as e:
                last_error = e
@@ -2,6 +2,7 @@
 from __future__ import annotations
 import concurrent.futures
 import sys
 import time
 from dataclasses import dataclass, field
@@ -13,6 +14,7 @@ from voice_to_notes.ipc.messages import (
    speaker_update_message,
 )
 from voice_to_notes.ipc.protocol import write_message
 from voice_to_notes.utils.ffmpeg import get_ffprobe_path
 from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
 from voice_to_notes.services.transcribe import (
    SegmentResult,
@@ -82,7 +84,7 @@ class PipelineService:
        """
        start_time = time.time()
-        # Step 1: Transcribe
+        # Step 0: Probe audio duration for conditional chunked transcription
        write_message(
            progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
        )
@@ -96,12 +98,11 @@ class PipelineService:
                "words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
            }))
        # Probe audio duration for conditional chunked transcription
        audio_duration_sec = None
        try:
            import subprocess
            probe_result = subprocess.run(
-                ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
+                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
                capture_output=True, text=True, check=True,
            )
@@ -109,9 +110,11 @@ class PipelineService:
        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
            pass
        def _run_transcription() -> TranscriptionResult:
            """Run transcription (chunked or standard based on duration)."""
            from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
            if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
-            transcription = self._transcribe_service.transcribe_chunked(
+                return self._transcribe_service.transcribe_chunked(
                    request_id=request_id,
                    file_path=file_path,
                    model_name=model_name,
@@ -121,7 +124,7 @@ class PipelineService:
                    on_segment=_emit_segment,
                )
            else:
-            transcription = self._transcribe_service.transcribe(
+                return self._transcribe_service.transcribe(
                    request_id=request_id,
                    file_path=file_path,
                    model_name=model_name,
@@ -132,7 +135,8 @@ class PipelineService:
                )
        if skip_diarization:
-            # Convert transcription directly without speaker labels
+            # Sequential: transcribe only, no diarization needed
            transcription = _run_transcription()
            result = PipelineResult(
                language=transcription.language,
                language_probability=transcription.language_probability,
@@ -150,24 +154,46 @@ class PipelineService:
                )
            return result
-        # Step 2: Diarize (with graceful fallback)
+        # Parallel execution: run transcription (0-45%) and diarization (45-90%)
        # concurrently, then merge (90-100%).
        write_message(
-            progress_message(request_id, 50, "pipeline", "Starting speaker diarization...")
+            progress_message(
                request_id, 0, "pipeline",
                "Starting transcription and diarization in parallel..."
            )
        )
        diarization = None
-        try:
+        diarization_error = None
-            diarization = self._diarize_service.diarize(
+
        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
            transcription_future = executor.submit(_run_transcription)
            # Use probed audio_duration_sec for diarization progress estimation
            # (transcription hasn't finished yet, so we can't use transcription.duration_ms)
            diarization_future = executor.submit(
                self._diarize_service.diarize,
                request_id=request_id,
                file_path=file_path,
                num_speakers=num_speakers,
                min_speakers=min_speakers,
                max_speakers=max_speakers,
                hf_token=hf_token,
-                audio_duration_sec=transcription.duration_ms / 1000.0,
+                audio_duration_sec=audio_duration_sec,
            )
            # Wait for both futures. We need the transcription result regardless,
            # but diarization may fail gracefully.
            transcription = transcription_future.result()
            write_message(
                progress_message(request_id, 45, "pipeline", "Transcription complete")
            )
            try:
                diarization = diarization_future.result()
            except Exception as e:
                import traceback
                diarization_error = e
                print(
                    f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
                    file=sys.stderr,
@@ -12,6 +12,7 @@ from faster_whisper import WhisperModel
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
 from voice_to_notes.utils.ffmpeg import get_ffmpeg_path, get_ffprobe_path
 CHUNK_REPORT_SIZE = 10
 LARGE_FILE_THRESHOLD_SEC = 3600  # 1 hour
@@ -202,7 +203,7 @@ class TranscribeService:
        # Get total duration via ffprobe
        try:
            probe_result = subprocess.run(
-                ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
+                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
                capture_output=True, text=True, check=True,
            )
@@ -235,7 +236,7 @@ class TranscribeService:
            tmp.close()
            try:
                subprocess.run(
-                    ["ffmpeg", "-y", "-ss", str(chunk_start),
+                    [get_ffmpeg_path(), "-y", "-ss", str(chunk_start),
                     "-t", str(chunk_duration_sec),
                     "-i", file_path,
                     "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
@@ -0,0 +1,43 @@
 """Resolve ffmpeg/ffprobe paths for both frozen and development builds."""
 from __future__ import annotations
 import os
 import sys
 def get_ffmpeg_path() -> str:
    """Return the path to the ffmpeg binary.
    When running as a frozen PyInstaller bundle, looks next to sys.executable.
    Otherwise falls back to the system PATH.
    """
    if getattr(sys, "frozen", False):
        # Frozen PyInstaller bundle — ffmpeg is next to the sidecar binary
        bundle_dir = os.path.dirname(sys.executable)
        candidates = [
            os.path.join(bundle_dir, "ffmpeg.exe" if sys.platform == "win32" else "ffmpeg"),
            os.path.join(bundle_dir, "ffmpeg"),
        ]
        for path in candidates:
            if os.path.isfile(path):
                return path
    return "ffmpeg"
 def get_ffprobe_path() -> str:
    """Return the path to the ffprobe binary.
    When running as a frozen PyInstaller bundle, looks next to sys.executable.
    Otherwise falls back to the system PATH.
    """
    if getattr(sys, "frozen", False):
        bundle_dir = os.path.dirname(sys.executable)
        candidates = [
            os.path.join(bundle_dir, "ffprobe.exe" if sys.platform == "win32" else "ffprobe"),
            os.path.join(bundle_dir, "ffprobe"),
        ]
        for path in candidates:
            if os.path.isfile(path):
                return path
    return "ffprobe"
@@ -73,7 +73,7 @@ pub fn download_diarize_model(hf_token: String) -> Result<Value, String> {
 /// Run the full transcription + diarization pipeline via the Python sidecar.
 #[tauri::command]
-pub fn run_pipeline(
+pub async fn run_pipeline(
    app: AppHandle,
    file_path: String,
    model: Option<String>,
@@ -106,13 +106,19 @@ pub fn run_pipeline(
        }),
    );
    // Run the blocking sidecar I/O on a separate thread so the async runtime
    // can deliver emitted events to the webview while processing is ongoing.
    let app_handle = app.clone();
    tauri::async_runtime::spawn_blocking(move || {
        let response = manager.send_and_receive_with_progress(&msg, |msg| {
            let event_name = match msg.msg_type.as_str() {
                "pipeline.segment" => "pipeline-segment",
                "pipeline.speaker_update" => "pipeline-speaker-update",
                _ => "pipeline-progress",
            };
-        let _ = app.emit(event_name, &msg.payload);
+            if let Err(e) = app_handle.emit(event_name, &msg.payload) {
                eprintln!("[sidecar-rs] Failed to emit {event_name}: {e}");
            }
        })?;
        if response.msg_type == "error" {
@@ -127,4 +133,7 @@ pub fn run_pipeline(
        }
        Ok(response.payload)
    })
    .await
    .map_err(|e| format!("Pipeline task failed: {e}"))?
 }
@@ -13,8 +13,13 @@ pub fn sidecar() -> &'static SidecarManager {
    INSTANCE.get_or_init(SidecarManager::new)
 }
-/// Manages the Python sidecar process lifecycle.
+/// Manages the sidecar process lifecycle.
-/// Uses separated stdin/stdout ownership to avoid BufReader conflicts.
+///
 /// Supports two modes:
 /// - **Production**: spawns a frozen PyInstaller binary (no Python required)
 /// - **Dev mode**: spawns system Python with `-m voice_to_notes.main`
 ///
 /// Dev mode is active when compiled in debug mode or when `VOICE_TO_NOTES_DEV=1`.
 pub struct SidecarManager {
    process: Mutex<Option<Child>>,
    stdin: Mutex<Option<ChildStdin>>,
@@ -30,38 +35,141 @@ impl SidecarManager {
        }
    }
    /// Check if we should use dev mode (system Python).
    fn is_dev_mode() -> bool {
        cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
    }
    /// Resolve the frozen sidecar binary path (production mode).
    fn resolve_sidecar_path() -> Result<std::path::PathBuf, String> {
        let exe = std::env::current_exe().map_err(|e| format!("Cannot get current exe: {e}"))?;
        let exe_dir = exe
            .parent()
            .ok_or_else(|| "Cannot get exe parent directory".to_string())?;
        let binary_name = if cfg!(target_os = "windows") {
            "voice-to-notes-sidecar.exe"
        } else {
            "voice-to-notes-sidecar"
        };
        // Tauri places externalBin next to the app binary
        let path = exe_dir.join(binary_name);
        if path.exists() {
            return Ok(path);
        }
        // Also check inside a subdirectory (onedir PyInstaller output)
        let subdir_path = exe_dir.join("voice-to-notes-sidecar").join(binary_name);
        if subdir_path.exists() {
            return Ok(subdir_path);
        }
        Err(format!(
            "Sidecar binary not found. Looked for:\n  {}\n  {}",
            path.display(),
            subdir_path.display(),
        ))
    }
    /// Find a working Python command for the current platform.
    fn find_python_command() -> &'static str {
        if cfg!(target_os = "windows") {
            "python"
        } else {
            "python3"
        }
    }
    /// Resolve the Python sidecar directory for dev mode.
    fn resolve_python_dir() -> Result<std::path::PathBuf, String> {
        let manifest_dir = env!("CARGO_MANIFEST_DIR");
        let python_dir = std::path::Path::new(manifest_dir)
            .join("../python")
            .canonicalize()
            .map_err(|e| format!("Cannot find python directory: {e}"))?;
        if python_dir.exists() {
            return Ok(python_dir);
        }
        // Fallback: relative to current exe
        let exe = std::env::current_exe().map_err(|e| e.to_string())?;
        let alt = exe
            .parent()
            .ok_or_else(|| "No parent dir".to_string())?
            .join("../python")
            .canonicalize()
            .map_err(|e| format!("Cannot find python directory: {e}"))?;
        Ok(alt)
    }
    /// Ensure the sidecar is running, starting it if needed.
    pub fn ensure_running(&self) -> Result<(), String> {
        if self.is_running() {
            return Ok(());
        }
-        let python_path = std::env::current_dir()
+        if Self::is_dev_mode() {
-            .map_err(|e| e.to_string())?
+            self.start_python_dev()
-            .join("../python")
+        } else {
-            .canonicalize()
+            match Self::resolve_sidecar_path() {
-            .map_err(|e| format!("Cannot find python directory: {e}"))?;
+                Ok(path) => self.start_binary(&path),
-
+                Err(e) => {
-        self.start(&python_path.to_string_lossy())
+                    eprintln!(
                        "[sidecar-rs] Frozen binary not found ({e}), falling back to dev mode"
                    );
                    self.start_python_dev()
                }
            }
        }
    }
-    /// Spawn the Python sidecar process.
+    /// Spawn the frozen sidecar binary (production mode).
-    pub fn start(&self, python_path: &str) -> Result<(), String> {
+    fn start_binary(&self, path: &std::path::Path) -> Result<(), String> {
        // Stop existing process if any
        self.stop().ok();
        eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());
-        let mut child = Command::new("python3")
+        let child = Command::new(path)
            .arg("-m")
            .arg("voice_to_notes.main")
            .current_dir(python_path)
            .env("PYTHONPATH", python_path)
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .stderr(Stdio::inherit())
            .spawn()
-            .map_err(|e| format!("Failed to start sidecar: {e}"))?;
+            .map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
-        // Take ownership of stdin and stdout separately
+        self.attach(child)?;
        self.wait_for_ready()
    }
    /// Spawn the Python sidecar in dev mode (system Python).
    fn start_python_dev(&self) -> Result<(), String> {
        self.stop().ok();
        let python_dir = Self::resolve_python_dir()?;
        let python_cmd = Self::find_python_command();
        eprintln!(
            "[sidecar-rs] Starting dev sidecar: {} -m voice_to_notes.main ({})",
            python_cmd,
            python_dir.display()
        );
        let child = Command::new(python_cmd)
            .arg("-m")
            .arg("voice_to_notes.main")
            .current_dir(&python_dir)
            .env("PYTHONPATH", &python_dir)
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .stderr(Stdio::inherit())
            .spawn()
            .map_err(|e| format!("Failed to start Python sidecar: {e}"))?;
        self.attach(child)?;
        self.wait_for_ready()
    }
    /// Take ownership of a spawned child's stdin/stdout and store the process handle.
    fn attach(&self, mut child: Child) -> Result<(), String> {
        let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
        let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
        let buf_reader = BufReader::new(stdout);
@@ -78,10 +186,6 @@ impl SidecarManager {
            let mut r = self.reader.lock().map_err(|e| e.to_string())?;
            *r = Some(buf_reader);
        }
        // Wait for the "ready" message
        self.wait_for_ready()?;
        Ok(())
    }
@@ -124,70 +228,6 @@ impl SidecarManager {
        self.send_and_receive_with_progress(msg, |_| {})
    }
    /// Send a message and read the response, calling on_progress for each progress message.
    pub fn send_and_receive_with_progress(
        &self,
        msg: &IPCMessage,
        on_progress: impl Fn(&IPCMessage),
    ) -> Result<IPCMessage, String> {
        // Write to stdin
        {
            let mut stdin_guard = self.stdin.lock().map_err(|e| e.to_string())?;
            if let Some(ref mut stdin) = *stdin_guard {
                let json = serde_json::to_string(msg).map_err(|e| e.to_string())?;
                stdin
                    .write_all(json.as_bytes())
                    .map_err(|e| format!("Write error: {e}"))?;
                stdin
                    .write_all(b"\n")
                    .map_err(|e| format!("Write error: {e}"))?;
                stdin.flush().map_err(|e| format!("Flush error: {e}"))?;
            } else {
                return Err("Sidecar stdin not available".to_string());
            }
        }
        // Read from stdout
        {
            let mut reader_guard = self.reader.lock().map_err(|e| e.to_string())?;
            if let Some(ref mut reader) = *reader_guard {
                let mut line = String::new();
                loop {
                    line.clear();
                    let bytes_read = reader
                        .read_line(&mut line)
                        .map_err(|e| format!("Read error: {e}"))?;
                    if bytes_read == 0 {
                        return Err("Sidecar closed stdout".to_string());
                    }
                    let trimmed = line.trim();
                    if trimmed.is_empty() {
                        continue;
                    }
                    // Skip non-JSON lines (library output that leaked to stdout)
                    let response: IPCMessage = match serde_json::from_str(trimmed) {
                        Ok(msg) => msg,
                        Err(_) => {
                            eprintln!(
                                "[sidecar-rs] Skipping non-JSON line: {}",
                                &trimmed[..trimmed.len().min(200)]
                            );
                            continue;
                        }
                    };
                    if response.msg_type == "progress" {
                        on_progress(&response);
                        continue;
                    }
                    return Ok(response);
                }
            } else {
                Err("Sidecar stdout not available".to_string())
            }
        }
    }
    /// Send a message and receive the response, calling a callback for intermediate messages.
    /// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
    pub fn send_and_receive_with_progress<F>(
@@ -46,7 +46,7 @@
    "license": "MIT",
    "linux": {
      "deb": {
-        "depends": ["python3", "python3-pip"]
+        "depends": []
      },
      "appimage": {
        "bundleMediaFramework": true
@@ -1,6 +1,7 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { segments, speakers } from '$lib/stores/transcript';
  import { settings } from '$lib/stores/settings';
  interface ChatMessage {
    role: 'user' | 'assistant';
@@ -43,9 +44,23 @@
        content: m.content,
      }));
      // Ensure the provider is configured with current credentials before chatting
      const s = $settings;
      const configMap: Record<string, Record<string, string>> = {
        openai: { api_key: s.openai_api_key, model: s.openai_model },
        anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
        litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
        local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
      };
      const config = configMap[s.ai_provider];
      if (config) {
        await invoke('ai_configure', { provider: s.ai_provider, config });
      }
      const result = await invoke<{ response: string }>('ai_chat', {
        messages: chatMessages,
        transcriptContext: getTranscriptContext(),
        provider: s.ai_provider,
      });
      messages = [...messages, { role: 'assistant', content: result.response }];
@@ -8,17 +8,7 @@
  let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
-  // Map internal stage names to user-friendly labels
+  // Pipeline steps in order
  const stageLabels: Record<string, string> = {
    'pipeline': 'Pipeline',
    'loading_model': 'Loading Model',
    'transcribing': 'Transcribing',
    'loading_diarization': 'Loading Diarization',
    'diarizing': 'Speaker Detection',
    'done': 'Complete',
  };
  // Pipeline steps for the task list
  const pipelineSteps = [
    { key: 'loading_model', label: 'Load transcription model' },
    { key: 'transcribing', label: 'Transcribe audio' },
@@ -27,17 +17,47 @@
    { key: 'merging', label: 'Merge results' },
  ];
  function getStepStatus(stepKey: string, currentStage: string): 'pending' | 'active' | 'done' {
  const stepOrder = pipelineSteps.map(s => s.key);
    const currentIdx = stepOrder.indexOf(currentStage);
    const stepIdx = stepOrder.indexOf(stepKey);
-    if (currentStage === 'done') return 'done';
+  // Track the highest step index we've reached (never goes backward)
-    if (stepIdx < currentIdx) return 'done';
+  let highestStepIdx = $state(-1);
-    if (stepIdx === currentIdx) return 'active';
+
  // Map non-step stages to step indices for progress tracking
  function stageToStepIdx(s: string): number {
    const direct = stepOrder.indexOf(s);
    if (direct >= 0) return direct;
    // 'pipeline' stage appears before known steps — don't change highwater mark
    return -1;
  }
  $effect(() => {
    if (!visible) {
      highestStepIdx = -1;
      return;
    }
    const idx = stageToStepIdx(stage);
    if (idx > highestStepIdx) {
      highestStepIdx = idx;
    }
  });
  function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
    if (stepIdx < highestStepIdx) return 'done';
    if (stepIdx === highestStepIdx) return 'active';
    return 'pending';
  }
  // User-friendly display of current stage
  const stageLabels: Record<string, string> = {
    'pipeline': 'Initializing...',
    'loading_model': 'Loading Model',
    'transcribing': 'Transcribing',
    'loading_diarization': 'Loading Diarization',
    'diarizing': 'Speaker Detection',
    'merging': 'Merging Results',
    'done': 'Complete',
  };
  let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
 </script>
@@ -50,8 +70,8 @@
      </div>
      <div class="steps">
-        {#each pipelineSteps as step}
+        {#each pipelineSteps as step, idx}
-          {@const status = getStepStatus(step.key, stage)}
+          {@const status = getStepStatus(idx)}
          <div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
            <span class="step-icon">
              {#if status === 'done'}
@@ -14,6 +14,7 @@
  let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
  let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
  let modelError = $state('');
  let revealedFields = $state<Set<string>>(new Set());
  async function testAndDownloadModel() {
    if (!localSettings.hf_token) {
@@ -111,7 +112,10 @@
        {:else if activeTab === 'speakers'}
          <div class="field">
            <label for="hf-token">HuggingFace Token</label>
-            <input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
+            <div class="input-reveal">
              <input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
              <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
            </div>
          </div>
          <div class="info-box">
            <p class="info-title">Setup (one-time)</p>
@@ -150,6 +154,23 @@
          {#if modelStatus === 'error'}
            <p class="status-error">{modelError}</p>
          {/if}
          <div class="field" style="margin-top: 1rem;">
            <label for="num-speakers">Number of speakers</label>
            <select
              id="num-speakers"
              value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
              onchange={(e) => {
                const v = parseInt((e.target as HTMLSelectElement).value, 10);
                localSettings.num_speakers = v === 0 ? null : v;
              }}
            >
              <option value="0">Auto-detect</option>
              {#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
                <option value={String(n)}>{n}</option>
              {/each}
            </select>
            <p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
          </div>
          <div class="field checkbox" style="margin-top: 1rem;">
            <label>
              <input type="checkbox" bind:checked={localSettings.skip_diarization} />
@@ -163,14 +184,17 @@
              <option value="local">Local (llama-server)</option>
              <option value="openai">OpenAI</option>
              <option value="anthropic">Anthropic</option>
-              <option value="litellm">LiteLLM</option>
+              <option value="litellm">OpenAI Compatible</option>
            </select>
          </div>
          {#if localSettings.ai_provider === 'openai'}
            <div class="field">
              <label for="openai-key">OpenAI API Key</label>
-              <input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
+              <div class="input-reveal">
                <input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
              </div>
            </div>
            <div class="field">
              <label for="openai-model">Model</label>
@@ -179,13 +203,27 @@
          {:else if localSettings.ai_provider === 'anthropic'}
            <div class="field">
              <label for="anthropic-key">Anthropic API Key</label>
-              <input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
+              <div class="input-reveal">
                <input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
              </div>
            </div>
            <div class="field">
              <label for="anthropic-model">Model</label>
              <input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
            </div>
          {:else if localSettings.ai_provider === 'litellm'}
            <div class="field">
              <label for="litellm-base">API Base URL</label>
              <input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
            </div>
            <div class="field">
              <label for="litellm-key">API Key</label>
              <div class="input-reveal">
                <input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
              </div>
            </div>
            <div class="field">
              <label for="litellm-model">Model</label>
              <input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
@@ -293,11 +331,36 @@
    color: #aaa;
    margin-bottom: 0.3rem;
  }
  .input-reveal {
    display: flex;
    gap: 0;
  }
  .input-reveal input {
    flex: 1;
    border-top-right-radius: 0;
    border-bottom-right-radius: 0;
  }
  .reveal-btn {
    background: #0f3460;
    border: 1px solid #4a5568;
    border-left: none;
    color: #aaa;
    padding: 0.5rem 0.6rem;
    border-radius: 0 4px 4px 0;
    cursor: pointer;
    font-size: 0.75rem;
    white-space: nowrap;
  }
  .reveal-btn:hover {
    color: #e0e0e0;
    background: #1a4a7a;
  }
  .field input,
  .field select {
    width: 100%;
    background: #1a1a2e;
    color: #e0e0e0;
    color-scheme: dark;
    border: 1px solid #4a5568;
    border-radius: 4px;
    padding: 0.5rem;
@@ -13,6 +13,7 @@
  let container: HTMLDivElement;
  let wavesurfer: WaveSurfer | null = $state(null);
  let isReady = $state(false);
  let isLoading = $state(false);
  let currentTime = $state('0:00');
  let totalTime = $state('0:00');
@@ -32,6 +33,7 @@
      barWidth: 2,
      barGap: 1,
      barRadius: 2,
      backend: 'WebAudio',
    });
    wavesurfer.on('timeupdate', (time: number) => {
@@ -41,6 +43,7 @@
    wavesurfer.on('ready', () => {
      isReady = true;
      isLoading = false;
      const dur = wavesurfer!.getDuration();
      durationMs.set(Math.round(dur * 1000));
      totalTime = formatTime(dur);
@@ -55,7 +58,7 @@
    });
    if (audioUrl) {
-      wavesurfer.load(audioUrl);
+      loadAudio(audioUrl);
    }
  });
@@ -89,16 +92,13 @@
      console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
      return;
    }
-    const timeSec = timeMs / 1000;
+    wavesurfer.setTime(timeMs / 1000);
    wavesurfer.setTime(timeSec);
    if (!wavesurfer.isPlaying()) {
      wavesurfer.play();
    }
  }
  /** Load a new audio file. */
  export function loadAudio(url: string) {
    isReady = false;
    isLoading = true;
    wavesurfer?.load(url);
  }
 </script>
@@ -8,6 +8,8 @@ export interface AppSettings {
  openai_model: string;
  anthropic_model: string;
  litellm_model: string;
  litellm_api_key: string;
  litellm_api_base: string;
  local_model_path: string;
  local_binary_path: string;
  transcription_model: string;
@@ -15,6 +17,7 @@ export interface AppSettings {
  transcription_language: string;
  skip_diarization: boolean;
  hf_token: string;
  num_speakers: number | null;
 }
 const defaults: AppSettings = {
@@ -24,6 +27,8 @@ const defaults: AppSettings = {
  openai_model: 'gpt-4o-mini',
  anthropic_model: 'claude-sonnet-4-6',
  litellm_model: 'gpt-4o-mini',
  litellm_api_key: '',
  litellm_api_base: '',
  local_model_path: '',
  local_binary_path: 'llama-server',
  transcription_model: 'base',
@@ -31,6 +36,7 @@ const defaults: AppSettings = {
  transcription_language: '',
  skip_diarization: false,
  hf_token: '',
  num_speakers: null,
 };
 export const settings = writable<AppSettings>({ ...defaults });
@@ -47,4 +53,20 @@ export async function loadSettings(): Promise<void> {
 export async function saveSettings(s: AppSettings): Promise<void> {
  settings.set(s);
  await invoke('save_settings', { settings: s });
  // Configure the AI provider in the Python sidecar
  const configMap: Record<string, Record<string, string>> = {
    openai: { api_key: s.openai_api_key, model: s.openai_model },
    anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
    litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
    local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
  };
  const config = configMap[s.ai_provider];
  if (config) {
    try {
      await invoke('ai_configure', { provider: s.ai_provider, config });
    } catch {
      // Sidecar may not be running yet — provider will be configured on first use
    }
  }
 }
@@ -13,6 +13,7 @@
  import type { Segment, Speaker } from '$lib/types/transcript';
  import { onMount, tick } from 'svelte';
  let appReady = $state(false);
  let waveformPlayer: WaveformPlayer;
  let audioUrl = $state('');
  let showSettings = $state(false);
@@ -54,6 +55,8 @@
    document.addEventListener('keydown', handleKeyDown);
    document.addEventListener('click', handleClickOutside);
    appReady = true;
    return () => {
      document.removeEventListener('keydown', handleKeyDown);
      document.removeEventListener('click', handleClickOutside);
@@ -200,6 +203,7 @@
        language: $settings.transcription_language || undefined,
        skipDiarization: $settings.skip_diarization || undefined,
        hfToken: $settings.hf_token || undefined,
        numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
      });
      // Create speaker entries from pipeline result
@@ -303,7 +307,15 @@
  }
 </script>
-<div class="app-header">
+{#if !appReady}
  <div class="splash-screen">
    <h1 class="splash-title">Voice to Notes</h1>
    <p class="splash-subtitle">Loading...</p>
    <div class="splash-spinner"></div>
  </div>
 {:else}
  <div class="app-shell">
  <div class="app-header">
    <h1>Voice to Notes</h1>
    <div class="header-actions">
      <button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
@@ -333,9 +345,9 @@
        </div>
      {/if}
    </div>
-</div>
+  </div>
-<div class="workspace">
+  <div class="workspace">
    <div class="main-content">
      <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
      <TranscriptEditor onWordClick={handleWordClick} />
@@ -344,19 +356,21 @@
      <SpeakerManager />
      <AIChatPanel />
    </div>
-</div>
+  </div>
  </div>
-<ProgressOverlay
+  <ProgressOverlay
    visible={isTranscribing}
    percent={transcriptionProgress}
    stage={transcriptionStage}
    message={transcriptionMessage}
-/>
+  />
-<SettingsModal
+  <SettingsModal
    visible={showSettings}
    onClose={() => showSettings = false}
-/>
+  />
 {/if}
 <style>
  .app-header {
@@ -453,11 +467,18 @@
  .export-option:hover {
    background: rgba(233, 69, 96, 0.2);
  }
  .app-shell {
    display: flex;
    flex-direction: column;
    height: 100vh;
    overflow: hidden;
  }
  .workspace {
    display: flex;
    gap: 1rem;
    padding: 1rem;
-    height: calc(100vh - 3rem);
+    flex: 1;
    min-height: 0;
    overflow: hidden;
    background: #0a0a23;
  }
@@ -467,6 +488,8 @@
    flex-direction: column;
    gap: 1rem;
    min-width: 0;
    min-height: 0;
    overflow-y: auto;
  }
  .sidebar-right {
    width: 300px;
@@ -474,5 +497,38 @@
    flex-direction: column;
    gap: 1rem;
    flex-shrink: 0;
    min-height: 0;
    overflow-y: auto;
  }
  .splash-screen {
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
    height: 100vh;
    background: #0a0a23;
    color: #e0e0e0;
    gap: 1rem;
  }
  .splash-title {
    font-size: 2rem;
    margin: 0;
    color: #e94560;
  }
  .splash-subtitle {
    font-size: 1rem;
    color: #888;
    margin: 0;
  }
  .splash-spinner {
    width: 32px;
    height: 32px;
    border: 3px solid #2a3a5e;
    border-top-color: #e94560;
    border-radius: 50%;
    animation: spin 0.8s linear infinite;
  }
  @keyframes spin {
    to { transform: rotate(360deg); }
  }
 </style>