Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing
Some checks failed
Release / Bump version and tag (push) Successful in 18s
Release / Build (macOS) (push) Successful in 5m27s
Release / Build (Linux) (push) Successful in 11m38s
Release / Build (Windows) (push) Has been cancelled

- transcribe: catch model load failures on CUDA and retry with CPU
- hardware detect: test CUDA runtime actually works (torch.zeros on cuda)
  before recommending GPU, since CPU-only builds detect CUDA via driver
  but lack cublas/cuDNN libraries

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude
2026-03-22 05:36:38 -07:00
parent 2be5024de7
commit 7efa3bb116
2 changed files with 35 additions and 9 deletions

View File

@@ -105,14 +105,23 @@ def detect_hardware() -> HardwareInfo:
# RAM info (cross-platform)
info.ram_mb = _detect_ram_mb()
# CUDA detection
# CUDA detection — verify runtime libraries actually work, not just torch detection
try:
import torch
if torch.cuda.is_available():
info.has_cuda = True
info.cuda_device_name = torch.cuda.get_device_name(0)
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
# Test that CUDA runtime libraries are actually loadable
try:
torch.zeros(1, device="cuda")
info.has_cuda = True
info.cuda_device_name = torch.cuda.get_device_name(0)
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
except Exception as e:
print(
f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
file=sys.stderr,
flush=True,
)
except ImportError:
print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)

View File

@@ -77,11 +77,28 @@ class TranscribeService:
file=sys.stderr,
flush=True,
)
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
try:
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
except Exception as e:
if device != "cpu":
print(
f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
file=sys.stderr,
flush=True,
)
device = "cpu"
compute_type = "int8"
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
else:
raise
self._current_model_name = model_name
self._current_device = device
self._current_compute_type = compute_type