Files
Claude 7efa3bb116
Some checks failed
Release / Bump version and tag (push) Successful in 18s
Release / Build (macOS) (push) Successful in 5m27s
Release / Build (Linux) (push) Successful in 11m38s
Release / Build (Windows) (push) Has been cancelled
Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing
- transcribe: catch model load failures on CUDA and retry with CPU
- hardware detect: test CUDA runtime actually works (torch.zeros on cuda)
  before recommending GPU, since CPU-only builds detect CUDA via driver
  but lack cublas/cuDNN libraries

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 05:36:40 -07:00

151 lines
4.9 KiB
Python

"""GPU/CPU detection and VRAM estimation."""
from __future__ import annotations
import ctypes
import os
import platform
import subprocess
import sys
from dataclasses import dataclass
@dataclass
class HardwareInfo:
"""Detected hardware capabilities."""
has_cuda: bool = False
cuda_device_name: str = ""
vram_mb: int = 0
ram_mb: int = 0
cpu_cores: int = 0
recommended_model: str = "base"
recommended_device: str = "cpu"
recommended_compute_type: str = "int8"
def _detect_ram_mb() -> int:
"""Detect total system RAM in MB (cross-platform).
Tries platform-specific methods in order:
1. Linux: read /proc/meminfo
2. macOS: sysctl hw.memsize
3. Windows: GlobalMemoryStatusEx via ctypes
4. Fallback: os.sysconf (most Unix systems)
Returns 0 if all methods fail.
"""
# Linux: read /proc/meminfo
if sys.platform == "linux":
try:
with open("/proc/meminfo") as f:
for line in f:
if line.startswith("MemTotal:"):
# Value is in kB
return int(line.split()[1]) // 1024
except (FileNotFoundError, ValueError, OSError):
pass
# macOS: sysctl hw.memsize (returns bytes)
if sys.platform == "darwin":
try:
result = subprocess.run(
["sysctl", "-n", "hw.memsize"],
capture_output=True,
text=True,
check=True,
)
return int(result.stdout.strip()) // (1024 * 1024)
except (subprocess.SubprocessError, ValueError, OSError):
pass
# Windows: GlobalMemoryStatusEx via ctypes
if sys.platform == "win32":
try:
class MEMORYSTATUSEX(ctypes.Structure):
_fields_ = [
("dwLength", ctypes.c_ulong),
("dwMemoryLoad", ctypes.c_ulong),
("ullTotalPhys", ctypes.c_ulonglong),
("ullAvailPhys", ctypes.c_ulonglong),
("ullTotalPageFile", ctypes.c_ulonglong),
("ullAvailPageFile", ctypes.c_ulonglong),
("ullTotalVirtual", ctypes.c_ulonglong),
("ullAvailVirtual", ctypes.c_ulonglong),
("ullAvailExtendedVirtual", ctypes.c_ulonglong),
]
mem_status = MEMORYSTATUSEX()
mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
return int(mem_status.ullTotalPhys) // (1024 * 1024)
except (AttributeError, OSError):
pass
# Fallback: os.sysconf (works on most Unix systems)
try:
page_size = os.sysconf("SC_PAGE_SIZE")
phys_pages = os.sysconf("SC_PHYS_PAGES")
if page_size > 0 and phys_pages > 0:
return (page_size * phys_pages) // (1024 * 1024)
except (ValueError, OSError, AttributeError):
pass
return 0
def detect_hardware() -> HardwareInfo:
"""Detect available hardware and recommend model configuration."""
info = HardwareInfo()
# CPU info
info.cpu_cores = os.cpu_count() or 1
# RAM info (cross-platform)
info.ram_mb = _detect_ram_mb()
# CUDA detection — verify runtime libraries actually work, not just torch detection
try:
import torch
if torch.cuda.is_available():
# Test that CUDA runtime libraries are actually loadable
try:
torch.zeros(1, device="cuda")
info.has_cuda = True
info.cuda_device_name = torch.cuda.get_device_name(0)
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
except Exception as e:
print(
f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
file=sys.stderr,
flush=True,
)
except ImportError:
print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)
# Model recommendation based on hardware
if info.has_cuda and info.vram_mb >= 8000:
info.recommended_model = "large-v3-turbo"
info.recommended_device = "cuda"
info.recommended_compute_type = "int8"
elif info.has_cuda and info.vram_mb >= 4000:
info.recommended_model = "medium"
info.recommended_device = "cuda"
info.recommended_compute_type = "int8"
elif info.ram_mb >= 16000:
info.recommended_model = "medium"
info.recommended_device = "cpu"
info.recommended_compute_type = "int8"
elif info.ram_mb >= 8000:
info.recommended_model = "small"
info.recommended_device = "cpu"
info.recommended_compute_type = "int8"
else:
info.recommended_model = "base"
info.recommended_device = "cpu"
info.recommended_compute_type = "int8"
return info