Cross-platform distribution, UI improvements, and performance optimizations

- PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 21:33:43 -07:00
parent 42ccd3e21d
commit 58faa83cb3
27 changed files with 1301 additions and 283 deletions
--- a/python/build_sidecar.py
+++ b/python/build_sidecar.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""Build the Voice to Notes sidecar as a standalone binary using PyInstaller.
+
+Usage:
+    python build_sidecar.py [--cpu-only]
+
+Produces a directory `dist/voice-to-notes-sidecar/` containing the frozen
+sidecar binary and all dependencies. The main binary is renamed to include
+the Tauri target triple for externalBin resolution.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import platform
+import shutil
+import stat
+import subprocess
+import sys
+import urllib.request
+import zipfile
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+DIST_DIR = SCRIPT_DIR / "dist"
+BUILD_DIR = SCRIPT_DIR / "build"
+SPEC_FILE = SCRIPT_DIR / "voice_to_notes.spec"
+
+# Static ffmpeg download URLs (GPL-licensed builds)
+FFMPEG_URLS: dict[str, str] = {
+    "linux-x86_64": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
+    "darwin-x86_64": "https://evermeet.cx/ffmpeg/getrelease/zip",
+    "darwin-arm64": "https://evermeet.cx/ffmpeg/getrelease/zip",
+    "win32-x86_64": "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip",
+}
+
+
+def get_target_triple() -> str:
+    """Determine the Tauri-compatible target triple for the current platform."""
+    machine = platform.machine().lower()
+    system = platform.system().lower()
+
+    arch_map = {
+        "x86_64": "x86_64",
+        "amd64": "x86_64",
+        "aarch64": "aarch64",
+        "arm64": "aarch64",
+    }
+    arch = arch_map.get(machine, machine)
+
+    if system == "linux":
+        return f"{arch}-unknown-linux-gnu"
+    elif system == "darwin":
+        return f"{arch}-apple-darwin"
+    elif system == "windows":
+        return f"{arch}-pc-windows-msvc"
+    else:
+        return f"{arch}-unknown-{system}"
+
+
+def create_venv_and_install(cpu_only: bool) -> Path:
+    """Create a fresh venv and install dependencies."""
+    venv_dir = BUILD_DIR / "sidecar-venv"
+    if venv_dir.exists():
+        shutil.rmtree(venv_dir)
+
+    print(f"[build] Creating venv at {venv_dir}")
+    subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
+
+    # Determine pip and python paths inside venv
+    if sys.platform == "win32":
+        pip = str(venv_dir / "Scripts" / "pip")
+        python = str(venv_dir / "Scripts" / "python")
+    else:
+        pip = str(venv_dir / "bin" / "pip")
+        python = str(venv_dir / "bin" / "python")
+
+    # Upgrade pip
+    subprocess.run([pip, "install", "--upgrade", "pip"], check=True)
+
+    # Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
+    if cpu_only:
+        print("[build] Installing PyTorch (CPU-only)")
+        subprocess.run(
+            [pip, "install", "torch", "torchaudio",
+             "--index-url", "https://download.pytorch.org/whl/cpu"],
+            check=True,
+        )
+    else:
+        print("[build] Installing PyTorch (default, may include CUDA)")
+        subprocess.run([pip, "install", "torch", "torchaudio"], check=True)
+
+    # Install project and dev deps (includes pyinstaller)
+    print("[build] Installing project dependencies")
+    subprocess.run([pip, "install", "-e", f"{SCRIPT_DIR}[dev]"], check=True)
+
+    return Path(python)
+
+
+def run_pyinstaller(python: Path) -> Path:
+    """Run PyInstaller using the spec file."""
+    print("[build] Running PyInstaller")
+    subprocess.run(
+        [str(python), "-m", "PyInstaller", "--clean", "--noconfirm", str(SPEC_FILE)],
+        cwd=str(SCRIPT_DIR),
+        check=True,
+    )
+    output_dir = DIST_DIR / "voice-to-notes-sidecar"
+    if not output_dir.exists():
+        raise RuntimeError(f"PyInstaller output not found at {output_dir}")
+    return output_dir
+
+
+def download_ffmpeg(output_dir: Path) -> None:
+    """Download a static ffmpeg/ffprobe binary for the current platform."""
+    system = sys.platform
+    machine = platform.machine().lower()
+    if machine in ("amd64", "x86_64"):
+        machine = "x86_64"
+    elif machine in ("aarch64", "arm64"):
+        machine = "arm64"
+
+    key = f"{system}-{machine}"
+    if system == "win32":
+        key = f"win32-{machine}"
+    elif system == "linux":
+        key = f"linux-{machine}"
+
+    url = FFMPEG_URLS.get(key)
+    if not url:
+        print(f"[build] Warning: No ffmpeg download URL for platform {key}, skipping")
+        return
+
+    print(f"[build] Downloading ffmpeg for {key}")
+    tmp_path = output_dir / "ffmpeg_download"
+    try:
+        urllib.request.urlretrieve(url, str(tmp_path))
+
+        if url.endswith(".tar.xz"):
+            # Linux static build
+            import tarfile
+            with tarfile.open(str(tmp_path), "r:xz") as tar:
+                for member in tar.getmembers():
+                    basename = os.path.basename(member.name)
+                    if basename in ("ffmpeg", "ffprobe"):
+                        member.name = basename
+                        tar.extract(member, path=str(output_dir))
+                        dest = output_dir / basename
+                        dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
+        elif url.endswith(".zip"):
+            with zipfile.ZipFile(str(tmp_path), "r") as zf:
+                for name in zf.namelist():
+                    basename = os.path.basename(name)
+                    if basename in ("ffmpeg", "ffprobe", "ffmpeg.exe", "ffprobe.exe"):
+                        data = zf.read(name)
+                        dest = output_dir / basename
+                        dest.write_bytes(data)
+                        if sys.platform != "win32":
+                            dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
+        print("[build] ffmpeg downloaded successfully")
+    except Exception as e:
+        print(f"[build] Warning: Failed to download ffmpeg: {e}")
+    finally:
+        if tmp_path.exists():
+            tmp_path.unlink()
+
+
+def rename_binary(output_dir: Path, target_triple: str) -> None:
+    """Rename the main binary to include the target triple for Tauri."""
+    if sys.platform == "win32":
+        src = output_dir / "voice-to-notes-sidecar.exe"
+        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}.exe"
+    else:
+        src = output_dir / "voice-to-notes-sidecar"
+        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}"
+
+    if src.exists():
+        print(f"[build] Renaming {src.name} -> {dst.name}")
+        src.rename(dst)
+    else:
+        print(f"[build] Warning: Expected binary not found at {src}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Build the Voice to Notes sidecar binary")
+    parser.add_argument(
+        "--cpu-only",
+        action="store_true",
+        default=True,
+        help="Install CPU-only PyTorch (default: True, avoids bundling CUDA)",
+    )
+    parser.add_argument(
+        "--with-cuda",
+        action="store_true",
+        help="Install PyTorch with CUDA support",
+    )
+    args = parser.parse_args()
+    cpu_only = not args.with_cuda
+
+    target_triple = get_target_triple()
+    print(f"[build] Target triple: {target_triple}")
+    print(f"[build] CPU-only: {cpu_only}")
+
+    python = create_venv_and_install(cpu_only)
+    output_dir = run_pyinstaller(python)
+    download_ffmpeg(output_dir)
+    rename_binary(output_dir, target_triple)
+
+    print(f"\n[build] Done! Sidecar built at: {output_dir}")
+    print(f"[build] Copy contents to src-tauri/binaries/ for Tauri bundling")
+
+
+if __name__ == "__main__":
+    main()
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -13,6 +13,8 @@ dependencies = [
    "faster-whisper>=1.1.0",
    "pyannote.audio>=3.1.0",
    "pysubs2>=1.7.0",
+    "openai>=1.0.0",
+    "anthropic>=0.20.0",
 ]

 [project.optional-dependencies]
@@ -20,6 +22,7 @@ dev = [
    "ruff>=0.8.0",
    "pytest>=8.0.0",
    "pytest-asyncio>=0.24.0",
+    "pyinstaller>=6.0",
 ]

 [tool.ruff]
--- a/python/voice_to_notes.spec
+++ b/python/voice_to_notes.spec
@@ -0,0 +1,67 @@
+# -*- mode: python ; coding: utf-8 -*-
+"""PyInstaller spec for the Voice to Notes sidecar binary."""
+
+from PyInstaller.utils.hooks import collect_all
+
+block_cipher = None
+
+# Collect all files for packages that have shared libraries / data files
+# PyInstaller often misses these for ML packages
+ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all("ctranslate2")
+faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all(
+    "faster_whisper"
+)
+pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
+
+a = Analysis(
+    ["voice_to_notes/main.py"],
+    pathex=[],
+    binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries,
+    datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas,
+    hiddenimports=[
+        "torch",
+        "torchaudio",
+        "huggingface_hub",
+        "pysubs2",
+        "openai",
+        "anthropic",
+        "litellm",
+    ]
+    + ctranslate2_hiddenimports
+    + faster_whisper_hiddenimports
+    + pyannote_hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=["tkinter", "test", "unittest", "pip", "setuptools"],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name="voice-to-notes-sidecar",
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+)
+
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name="voice-to-notes-sidecar",
+)
--- a/python/voice_to_notes/hardware/detect.py
+++ b/python/voice_to_notes/hardware/detect.py
@@ -2,7 +2,10 @@

 from __future__ import annotations

+import ctypes
 import os
+import platform
+import subprocess
 import sys
 from dataclasses import dataclass

@@ -21,6 +24,77 @@ class HardwareInfo:
    recommended_compute_type: str = "int8"


+def _detect_ram_mb() -> int:
+    """Detect total system RAM in MB (cross-platform).
+
+    Tries platform-specific methods in order:
+    1. Linux: read /proc/meminfo
+    2. macOS: sysctl hw.memsize
+    3. Windows: GlobalMemoryStatusEx via ctypes
+    4. Fallback: os.sysconf (most Unix systems)
+
+    Returns 0 if all methods fail.
+    """
+    # Linux: read /proc/meminfo
+    if sys.platform == "linux":
+        try:
+            with open("/proc/meminfo") as f:
+                for line in f:
+                    if line.startswith("MemTotal:"):
+                        # Value is in kB
+                        return int(line.split()[1]) // 1024
+        except (FileNotFoundError, ValueError, OSError):
+            pass
+
+    # macOS: sysctl hw.memsize (returns bytes)
+    if sys.platform == "darwin":
+        try:
+            result = subprocess.run(
+                ["sysctl", "-n", "hw.memsize"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return int(result.stdout.strip()) // (1024 * 1024)
+        except (subprocess.SubprocessError, ValueError, OSError):
+            pass
+
+    # Windows: GlobalMemoryStatusEx via ctypes
+    if sys.platform == "win32":
+        try:
+
+            class MEMORYSTATUSEX(ctypes.Structure):
+                _fields_ = [
+                    ("dwLength", ctypes.c_ulong),
+                    ("dwMemoryLoad", ctypes.c_ulong),
+                    ("ullTotalPhys", ctypes.c_ulonglong),
+                    ("ullAvailPhys", ctypes.c_ulonglong),
+                    ("ullTotalPageFile", ctypes.c_ulonglong),
+                    ("ullAvailPageFile", ctypes.c_ulonglong),
+                    ("ullTotalVirtual", ctypes.c_ulonglong),
+                    ("ullAvailVirtual", ctypes.c_ulonglong),
+                    ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
+                ]
+
+            mem_status = MEMORYSTATUSEX()
+            mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
+            if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
+                return int(mem_status.ullTotalPhys) // (1024 * 1024)
+        except (AttributeError, OSError):
+            pass
+
+    # Fallback: os.sysconf (works on most Unix systems)
+    try:
+        page_size = os.sysconf("SC_PAGE_SIZE")
+        phys_pages = os.sysconf("SC_PHYS_PAGES")
+        if page_size > 0 and phys_pages > 0:
+            return (page_size * phys_pages) // (1024 * 1024)
+    except (ValueError, OSError, AttributeError):
+        pass
+
+    return 0
+
+
 def detect_hardware() -> HardwareInfo:
    """Detect available hardware and recommend model configuration."""
    info = HardwareInfo()
@@ -28,16 +102,8 @@ def detect_hardware() -> HardwareInfo:
    # CPU info
    info.cpu_cores = os.cpu_count() or 1

-    # RAM info
-    try:
-        with open("/proc/meminfo") as f:
-            for line in f:
-                if line.startswith("MemTotal:"):
-                    # Value is in kB
-                    info.ram_mb = int(line.split()[1]) // 1024
-                    break
-    except (FileNotFoundError, ValueError):
-        pass
+    # RAM info (cross-platform)
+    info.ram_mb = _detect_ram_mb()

    # CUDA detection
    try:
--- a/python/voice_to_notes/ipc/handlers.py
+++ b/python/voice_to_notes/ipc/handlers.py
@@ -260,10 +260,12 @@ def make_ai_chat_handler() -> HandlerFunc:
                    model=config.get("model", "claude-sonnet-4-6"),
                ))
            elif provider_name == "litellm":
-                from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+                from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider

-                service.register_provider("litellm", LiteLLMProvider(
+                service.register_provider("litellm", OpenAICompatibleProvider(
                    model=config.get("model", "gpt-4o-mini"),
+                    api_key=config.get("api_key"),
+                    api_base=config.get("api_base"),
                ))
            return IPCMessage(
                id=msg.id,
--- a/python/voice_to_notes/providers/litellm_provider.py
+++ b/python/voice_to_notes/providers/litellm_provider.py
@@ -1,4 +1,4 @@
-"""LiteLLM provider — multi-provider gateway."""
+"""OpenAI-compatible provider — works with any OpenAI-compatible API endpoint."""

 from __future__ import annotations

@@ -7,36 +7,44 @@ from typing import Any
 from voice_to_notes.providers.base import AIProvider


-class LiteLLMProvider(AIProvider):
-    """Routes through LiteLLM for access to 100+ LLM providers."""
+class OpenAICompatibleProvider(AIProvider):
+    """Connects to any OpenAI-compatible API (LiteLLM proxy, Ollama, vLLM, etc.)."""

-    def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
+    def __init__(
+        self,
+        api_key: str | None = None,
+        api_base: str | None = None,
+        model: str = "gpt-4o-mini",
+        **kwargs: Any,
+    ) -> None:
+        self._api_key = api_key or "sk-no-key"
+        self._api_base = api_base
        self._model = model
        self._extra_kwargs = kwargs

    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
-        try:
-            import litellm
-        except ImportError:
-            raise RuntimeError("litellm package is required. Install with: pip install litellm")
+        from openai import OpenAI

-        merged_kwargs = {**self._extra_kwargs, **kwargs}
-        response = litellm.completion(
-            model=merged_kwargs.get("model", self._model),
+        client_kwargs: dict[str, Any] = {"api_key": self._api_key}
+        if self._api_base:
+            client_kwargs["base_url"] = self._api_base
+
+        client = OpenAI(**client_kwargs)
+        response = client.chat.completions.create(
+            model=kwargs.get("model", self._model),
            messages=messages,
-            temperature=merged_kwargs.get("temperature", 0.7),
-            max_tokens=merged_kwargs.get("max_tokens", 2048),
+            temperature=kwargs.get("temperature", 0.7),
+            max_tokens=kwargs.get("max_tokens", 2048),
        )
        return response.choices[0].message.content or ""

    def is_available(self) -> bool:
        try:
-            import litellm  # noqa: F401
-
-            return True
+            import openai  # noqa: F401
+            return bool(self._api_key and self._api_base)
        except ImportError:
            return False

    @property
    def name(self) -> str:
-        return "LiteLLM"
+        return "OpenAI Compatible"
--- a/python/voice_to_notes/services/ai_provider.py
+++ b/python/voice_to_notes/services/ai_provider.py
@@ -92,7 +92,7 @@ class AIProviderService:
 def create_default_service() -> AIProviderService:
    """Create an AIProviderService with all supported providers registered."""
    from voice_to_notes.providers.anthropic_provider import AnthropicProvider
-    from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+    from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
    from voice_to_notes.providers.local_provider import LocalProvider
    from voice_to_notes.providers.openai_provider import OpenAIProvider

@@ -100,5 +100,5 @@ def create_default_service() -> AIProviderService:
    service.register_provider("local", LocalProvider())
    service.register_provider("openai", OpenAIProvider())
    service.register_provider("anthropic", AnthropicProvider())
-    service.register_provider("litellm", LiteLLMProvider())
+    service.register_provider("litellm", OpenAICompatibleProvider())
    return service
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -16,6 +16,7 @@ from typing import Any
 # np.isfinite(None) crashes when max_speakers is not set.
 os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")

+from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message

@@ -40,7 +41,7 @@ def _ensure_wav(file_path: str) -> tuple[str, str | None]:
    try:
        subprocess.run(
            [
-                "ffmpeg", "-y", "-i", file_path,
+                get_ffmpeg_path(), "-y", "-i", file_path,
                "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
                tmp.name,
            ],
@@ -118,6 +119,14 @@ class DiarizeService:

                self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
                print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
+                # Move pipeline to GPU if available
+                try:
+                    import torch
+                    if torch.cuda.is_available():
+                        self._pipeline = self._pipeline.to(torch.device("cuda"))
+                        print(f"[sidecar] Diarization pipeline moved to GPU", file=sys.stderr, flush=True)
+                except Exception as e:
+                    print(f"[sidecar] GPU not available for diarization: {e}", file=sys.stderr, flush=True)
                return self._pipeline
            except Exception as e:
                last_error = e
--- a/python/voice_to_notes/services/pipeline.py
+++ b/python/voice_to_notes/services/pipeline.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import concurrent.futures
 import sys
 import time
 from dataclasses import dataclass, field
@@ -13,6 +14,7 @@ from voice_to_notes.ipc.messages import (
    speaker_update_message,
 )
 from voice_to_notes.ipc.protocol import write_message
+from voice_to_notes.utils.ffmpeg import get_ffprobe_path
 from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
 from voice_to_notes.services.transcribe import (
    SegmentResult,
@@ -82,7 +84,7 @@ class PipelineService:
        """
        start_time = time.time()

-        # Step 1: Transcribe
+        # Step 0: Probe audio duration for conditional chunked transcription
        write_message(
            progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
        )
@@ -96,12 +98,11 @@ class PipelineService:
                "words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
            }))

-        # Probe audio duration for conditional chunked transcription
        audio_duration_sec = None
        try:
            import subprocess
            probe_result = subprocess.run(
-                ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
+                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
                capture_output=True, text=True, check=True,
            )
@@ -109,30 +110,33 @@ class PipelineService:
        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
            pass

-        from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
-        if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
-            transcription = self._transcribe_service.transcribe_chunked(
-                request_id=request_id,
-                file_path=file_path,
-                model_name=model_name,
-                device=device,
-                compute_type=compute_type,
-                language=language,
-                on_segment=_emit_segment,
-            )
-        else:
-            transcription = self._transcribe_service.transcribe(
-                request_id=request_id,
-                file_path=file_path,
-                model_name=model_name,
-                device=device,
-                compute_type=compute_type,
-                language=language,
-                on_segment=_emit_segment,
-            )
+        def _run_transcription() -> TranscriptionResult:
+            """Run transcription (chunked or standard based on duration)."""
+            from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
+            if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
+                return self._transcribe_service.transcribe_chunked(
+                    request_id=request_id,
+                    file_path=file_path,
+                    model_name=model_name,
+                    device=device,
+                    compute_type=compute_type,
+                    language=language,
+                    on_segment=_emit_segment,
+                )
+            else:
+                return self._transcribe_service.transcribe(
+                    request_id=request_id,
+                    file_path=file_path,
+                    model_name=model_name,
+                    device=device,
+                    compute_type=compute_type,
+                    language=language,
+                    on_segment=_emit_segment,
+                )

        if skip_diarization:
-            # Convert transcription directly without speaker labels
+            # Sequential: transcribe only, no diarization needed
+            transcription = _run_transcription()
            result = PipelineResult(
                language=transcription.language,
                language_probability=transcription.language_probability,
@@ -150,37 +154,59 @@ class PipelineService:
                )
            return result

-        # Step 2: Diarize (with graceful fallback)
+        # Parallel execution: run transcription (0-45%) and diarization (45-90%)
+        # concurrently, then merge (90-100%).
        write_message(
-            progress_message(request_id, 50, "pipeline", "Starting speaker diarization...")
+            progress_message(
+                request_id, 0, "pipeline",
+                "Starting transcription and diarization in parallel..."
+            )
        )

        diarization = None
-        try:
-            diarization = self._diarize_service.diarize(
+        diarization_error = None
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+            transcription_future = executor.submit(_run_transcription)
+
+            # Use probed audio_duration_sec for diarization progress estimation
+            # (transcription hasn't finished yet, so we can't use transcription.duration_ms)
+            diarization_future = executor.submit(
+                self._diarize_service.diarize,
                request_id=request_id,
                file_path=file_path,
                num_speakers=num_speakers,
                min_speakers=min_speakers,
                max_speakers=max_speakers,
                hf_token=hf_token,
-                audio_duration_sec=transcription.duration_ms / 1000.0,
+                audio_duration_sec=audio_duration_sec,
            )
-        except Exception as e:
-            import traceback
-            print(
-                f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
-                file=sys.stderr,
-                flush=True,
-            )
-            traceback.print_exc(file=sys.stderr)
+
+            # Wait for both futures. We need the transcription result regardless,
+            # but diarization may fail gracefully.
+            transcription = transcription_future.result()
            write_message(
-                progress_message(
-                    request_id, 80, "pipeline",
-                    f"Diarization failed ({e}), using transcription only..."
-                )
+                progress_message(request_id, 45, "pipeline", "Transcription complete")
            )

+            try:
+                diarization = diarization_future.result()
+            except Exception as e:
+                import traceback
+                diarization_error = e
+                print(
+                    f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
+                    file=sys.stderr,
+                    flush=True,
+                )
+                traceback.print_exc(file=sys.stderr)
+                write_message(
+                    progress_message(
+                        request_id, 80, "pipeline",
+                        f"Diarization failed ({e}), using transcription only..."
+                    )
+                )
+
        # Step 3: Merge (or skip if diarization failed)
        if diarization is not None:
            write_message(
--- a/python/voice_to_notes/services/transcribe.py
+++ b/python/voice_to_notes/services/transcribe.py
@@ -12,6 +12,7 @@ from faster_whisper import WhisperModel

 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
+from voice_to_notes.utils.ffmpeg import get_ffmpeg_path, get_ffprobe_path

 CHUNK_REPORT_SIZE = 10
 LARGE_FILE_THRESHOLD_SEC = 3600  # 1 hour
@@ -202,7 +203,7 @@ class TranscribeService:
        # Get total duration via ffprobe
        try:
            probe_result = subprocess.run(
-                ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
+                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
                capture_output=True, text=True, check=True,
            )
@@ -235,7 +236,7 @@ class TranscribeService:
            tmp.close()
            try:
                subprocess.run(
-                    ["ffmpeg", "-y", "-ss", str(chunk_start),
+                    [get_ffmpeg_path(), "-y", "-ss", str(chunk_start),
                     "-t", str(chunk_duration_sec),
                     "-i", file_path,
                     "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
--- a/python/voice_to_notes/utils/ffmpeg.py
+++ b/python/voice_to_notes/utils/ffmpeg.py
@@ -0,0 +1,43 @@
+"""Resolve ffmpeg/ffprobe paths for both frozen and development builds."""
+
+from __future__ import annotations
+
+import os
+import sys
+
+
+def get_ffmpeg_path() -> str:
+    """Return the path to the ffmpeg binary.
+
+    When running as a frozen PyInstaller bundle, looks next to sys.executable.
+    Otherwise falls back to the system PATH.
+    """
+    if getattr(sys, "frozen", False):
+        # Frozen PyInstaller bundle — ffmpeg is next to the sidecar binary
+        bundle_dir = os.path.dirname(sys.executable)
+        candidates = [
+            os.path.join(bundle_dir, "ffmpeg.exe" if sys.platform == "win32" else "ffmpeg"),
+            os.path.join(bundle_dir, "ffmpeg"),
+        ]
+        for path in candidates:
+            if os.path.isfile(path):
+                return path
+    return "ffmpeg"
+
+
+def get_ffprobe_path() -> str:
+    """Return the path to the ffprobe binary.
+
+    When running as a frozen PyInstaller bundle, looks next to sys.executable.
+    Otherwise falls back to the system PATH.
+    """
+    if getattr(sys, "frozen", False):
+        bundle_dir = os.path.dirname(sys.executable)
+        candidates = [
+            os.path.join(bundle_dir, "ffprobe.exe" if sys.platform == "win32" else "ffprobe"),
+            os.path.join(bundle_dir, "ffprobe"),
+        ]
+        for path in candidates:
+            if os.path.isfile(path):
+                return path
+    return "ffprobe"