python/voice_to_notes/hardware/detect.py

"""GPU/CPU detection and VRAM estimation."""

from __future__ import annotations

import ctypes
import os
import platform
import subprocess
import sys
from dataclasses import dataclass


@dataclass
class HardwareInfo:
    """Detected hardware capabilities."""

    has_cuda: bool = False
    cuda_device_name: str = ""
    vram_mb: int = 0
    ram_mb: int = 0
    cpu_cores: int = 0
    recommended_model: str = "base"
    recommended_device: str = "cpu"
    recommended_compute_type: str = "int8"


def _detect_ram_mb() -> int:
    """Detect total system RAM in MB (cross-platform).

    Tries platform-specific methods in order:
    1. Linux: read /proc/meminfo
    2. macOS: sysctl hw.memsize
    3. Windows: GlobalMemoryStatusEx via ctypes
    4. Fallback: os.sysconf (most Unix systems)

    Returns 0 if all methods fail.
    """
    # Linux: read /proc/meminfo
    if sys.platform == "linux":
        try:
            with open("/proc/meminfo") as f:
                for line in f:
                    if line.startswith("MemTotal:"):
                        # Value is in kB
                        return int(line.split()[1]) // 1024
        except (FileNotFoundError, ValueError, OSError):
            pass

    # macOS: sysctl hw.memsize (returns bytes)
    if sys.platform == "darwin":
        try:
            result = subprocess.run(
                ["sysctl", "-n", "hw.memsize"],
                capture_output=True,
                text=True,
                check=True,
            )
            return int(result.stdout.strip()) // (1024 * 1024)
        except (subprocess.SubprocessError, ValueError, OSError):
            pass

    # Windows: GlobalMemoryStatusEx via ctypes
    if sys.platform == "win32":
        try:

            class MEMORYSTATUSEX(ctypes.Structure):
                _fields_ = [
                    ("dwLength", ctypes.c_ulong),
                    ("dwMemoryLoad", ctypes.c_ulong),
                    ("ullTotalPhys", ctypes.c_ulonglong),
                    ("ullAvailPhys", ctypes.c_ulonglong),
                    ("ullTotalPageFile", ctypes.c_ulonglong),
                    ("ullAvailPageFile", ctypes.c_ulonglong),
                    ("ullTotalVirtual", ctypes.c_ulonglong),
                    ("ullAvailVirtual", ctypes.c_ulonglong),
                    ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
                ]

            mem_status = MEMORYSTATUSEX()
            mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
            if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
                return int(mem_status.ullTotalPhys) // (1024 * 1024)
        except (AttributeError, OSError):
            pass

    # Fallback: os.sysconf (works on most Unix systems)
    try:
        page_size = os.sysconf("SC_PAGE_SIZE")
        phys_pages = os.sysconf("SC_PHYS_PAGES")
        if page_size > 0 and phys_pages > 0:
            return (page_size * phys_pages) // (1024 * 1024)
    except (ValueError, OSError, AttributeError):
        pass

    return 0


def detect_hardware() -> HardwareInfo:
    """Detect available hardware and recommend model configuration."""
    info = HardwareInfo()

    # CPU info
    info.cpu_cores = os.cpu_count() or 1

    # RAM info (cross-platform)
    info.ram_mb = _detect_ram_mb()

    # CUDA detection — verify runtime libraries actually work, not just torch detection
    try:
        import torch

        if torch.cuda.is_available():
            # Test that CUDA runtime libraries are actually loadable
            try:
                torch.zeros(1, device="cuda")
                info.has_cuda = True
                info.cuda_device_name = torch.cuda.get_device_name(0)
                info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
            except Exception as e:
                print(
                    f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
                    file=sys.stderr,
                    flush=True,
                )
    except ImportError:
        print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)

    # Model recommendation based on hardware
    if info.has_cuda and info.vram_mb >= 8000:
        info.recommended_model = "large-v3-turbo"
        info.recommended_device = "cuda"
        info.recommended_compute_type = "int8"
    elif info.has_cuda and info.vram_mb >= 4000:
        info.recommended_model = "medium"
        info.recommended_device = "cuda"
        info.recommended_compute_type = "int8"
    elif info.ram_mb >= 16000:
        info.recommended_model = "medium"
        info.recommended_device = "cpu"
        info.recommended_compute_type = "int8"
    elif info.ram_mb >= 8000:
        info.recommended_model = "small"
        info.recommended_device = "cpu"
        info.recommended_compute_type = "int8"
    else:
        info.recommended_model = "base"
        info.recommended_device = "cpu"
        info.recommended_compute_type = "int8"

    return info
Phase 1 foundation: Tauri shell, Python sidecar, SQLite database Tauri v2 + Svelte + TypeScript frontend: - App shell with workspace layout (waveform, transcript, speakers, AI chat) - Placeholder components for all major UI areas - Typed stores (project, transcript, playback, AI) - TypeScript interfaces matching the database schema - Tauri bridge service with typed invoke wrappers - svelte-check passes with 0 errors Rust backend: - Tauri v2 app entry point with command registration - SQLite database layer (rusqlite with bundled SQLite) - Full schema: projects, media_files, speakers, segments, words, ai_outputs, annotations (with indexes) - Model structs with serde serialization - CRUD queries for projects, speakers, segments, words - Segment text editing preserves original text - Schema versioning for future migrations - 6 tests passing - Command stubs for project, transcribe, export, AI, settings, system - App state management Python sidecar: - JSON-line IPC protocol (stdin/stdout) - Message types: IPCMessage, progress, error, ready - Handler registry with routing and error handling - Ping/pong handler for connectivity testing - Service stubs: transcribe, diarize, pipeline, AI, export - Provider stubs: local (llama-server), OpenAI, Anthropic, LiteLLM - Hardware detection stubs - 14 tests passing, ruff clean Also adds: - Testing strategy document (docs/TESTING.md) - Validation script (scripts/validate.sh) - Updated .gitignore for Svelte, Rust, Python artifacts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:16:06 -08:00			`"""GPU/CPU detection and VRAM estimation."""`

			`from __future__ import annotations`

Cross-platform distribution, UI improvements, and performance optimizations - PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-20 21:33:43 -07:00			`import ctypes`
Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00			`import os`
Cross-platform distribution, UI improvements, and performance optimizations - PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-20 21:33:43 -07:00			`import platform`
			`import subprocess`
Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00			`import sys`
			`from dataclasses import dataclass`


			`@dataclass`
			`class HardwareInfo:`
			`"""Detected hardware capabilities."""`

			`has_cuda: bool = False`
			`cuda_device_name: str = ""`
			`vram_mb: int = 0`
			`ram_mb: int = 0`
			`cpu_cores: int = 0`
			`recommended_model: str = "base"`
			`recommended_device: str = "cpu"`
			`recommended_compute_type: str = "int8"`


Cross-platform distribution, UI improvements, and performance optimizations - PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-20 21:33:43 -07:00			`def _detect_ram_mb() -> int:`
			`"""Detect total system RAM in MB (cross-platform).`

			`Tries platform-specific methods in order:`
			`1. Linux: read /proc/meminfo`
			`2. macOS: sysctl hw.memsize`
			`3. Windows: GlobalMemoryStatusEx via ctypes`
			`4. Fallback: os.sysconf (most Unix systems)`

			`Returns 0 if all methods fail.`
			`"""`
			`# Linux: read /proc/meminfo`
			`if sys.platform == "linux":`
			`try:`
			`with open("/proc/meminfo") as f:`
			`for line in f:`
			`if line.startswith("MemTotal:"):`
			`# Value is in kB`
			`return int(line.split()[1]) // 1024`
			`except (FileNotFoundError, ValueError, OSError):`
			`pass`

			`# macOS: sysctl hw.memsize (returns bytes)`
			`if sys.platform == "darwin":`
			`try:`
			`result = subprocess.run(`
			`["sysctl", "-n", "hw.memsize"],`
			`capture_output=True,`
			`text=True,`
			`check=True,`
			`)`
			`return int(result.stdout.strip()) // (1024 * 1024)`
			`except (subprocess.SubprocessError, ValueError, OSError):`
			`pass`

			`# Windows: GlobalMemoryStatusEx via ctypes`
			`if sys.platform == "win32":`
			`try:`

			`class MEMORYSTATUSEX(ctypes.Structure):`
			`_fields_ = [`
			`("dwLength", ctypes.c_ulong),`
			`("dwMemoryLoad", ctypes.c_ulong),`
			`("ullTotalPhys", ctypes.c_ulonglong),`
			`("ullAvailPhys", ctypes.c_ulonglong),`
			`("ullTotalPageFile", ctypes.c_ulonglong),`
			`("ullAvailPageFile", ctypes.c_ulonglong),`
			`("ullTotalVirtual", ctypes.c_ulonglong),`
			`("ullAvailVirtual", ctypes.c_ulonglong),`
			`("ullAvailExtendedVirtual", ctypes.c_ulonglong),`
			`]`

			`mem_status = MEMORYSTATUSEX()`
			`mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)`
			`if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):`
			`return int(mem_status.ullTotalPhys) // (1024 * 1024)`
			`except (AttributeError, OSError):`
			`pass`

			`# Fallback: os.sysconf (works on most Unix systems)`
			`try:`
			`page_size = os.sysconf("SC_PAGE_SIZE")`
			`phys_pages = os.sysconf("SC_PHYS_PAGES")`
			`if page_size > 0 and phys_pages > 0:`
			`return (page_size * phys_pages) // (1024 * 1024)`
			`except (ValueError, OSError, AttributeError):`
			`pass`

			`return 0`


Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00			`def detect_hardware() -> HardwareInfo:`
			`"""Detect available hardware and recommend model configuration."""`
			`info = HardwareInfo()`

			`# CPU info`
			`info.cpu_cores = os.cpu_count() or 1`

Cross-platform distribution, UI improvements, and performance optimizations - PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-20 21:33:43 -07:00			`# RAM info (cross-platform)`
			`info.ram_mb = _detect_ram_mb()`
Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00
Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing - transcribe: catch model load failures on CUDA and retry with CPU - hardware detect: test CUDA runtime actually works (torch.zeros on cuda) before recommending GPU, since CPU-only builds detect CUDA via driver but lack cublas/cuDNN libraries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-22 05:36:38 -07:00			`# CUDA detection — verify runtime libraries actually work, not just torch detection`
Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00			`try:`
			`import torch`

			`if torch.cuda.is_available():`
Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing - transcribe: catch model load failures on CUDA and retry with CPU - hardware detect: test CUDA runtime actually works (torch.zeros on cuda) before recommending GPU, since CPU-only builds detect CUDA via driver but lack cublas/cuDNN libraries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-22 05:36:38 -07:00			`# Test that CUDA runtime libraries are actually loadable`
			`try:`
			`torch.zeros(1, device="cuda")`
			`info.has_cuda = True`
			`info.cuda_device_name = torch.cuda.get_device_name(0)`
			`info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)`
			`except Exception as e:`
			`print(`
			`f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",`
			`file=sys.stderr,`
			`flush=True,`
			`)`
Phase 2: Core transcription pipeline and audio playback - Implement faster-whisper TranscribeService with word-level timestamps, progress reporting, and hardware auto-detection - Wire up Rust SidecarManager for Python process lifecycle (spawn, IPC, shutdown) - Add transcribe_file Tauri command bridging frontend to Python sidecar - Integrate wavesurfer.js WaveformPlayer with play/pause, skip, seek controls - Build TranscriptEditor with word-level click-to-seek and active highlighting - Connect file import flow: prompt → asset load → transcribe → display - Add typed tauri-bridge service with TranscriptionResult interface - Add Python tests for hardware detection and transcription result formatting Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-26 15:53:09 -08:00			`except ImportError:`
			`print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)`

			`# Model recommendation based on hardware`
			`if info.has_cuda and info.vram_mb >= 8000:`
			`info.recommended_model = "large-v3-turbo"`
			`info.recommended_device = "cuda"`
			`info.recommended_compute_type = "int8"`
			`elif info.has_cuda and info.vram_mb >= 4000:`
			`info.recommended_model = "medium"`
			`info.recommended_device = "cuda"`
			`info.recommended_compute_type = "int8"`
			`elif info.ram_mb >= 16000:`
			`info.recommended_model = "medium"`
			`info.recommended_device = "cpu"`
			`info.recommended_compute_type = "int8"`
			`elif info.ram_mb >= 8000:`
			`info.recommended_model = "small"`
			`info.recommended_device = "cpu"`
			`info.recommended_compute_type = "int8"`
			`else:`
			`info.recommended_model = "base"`
			`info.recommended_device = "cpu"`
			`info.recommended_compute_type = "int8"`

			`return info`