Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing
- transcribe: catch model load failures on CUDA and retry with CPU - hardware detect: test CUDA runtime actually works (torch.zeros on cuda) before recommending GPU, since CPU-only builds detect CUDA via driver but lack cublas/cuDNN libraries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -105,14 +105,23 @@ def detect_hardware() -> HardwareInfo:
|
|||||||
# RAM info (cross-platform)
|
# RAM info (cross-platform)
|
||||||
info.ram_mb = _detect_ram_mb()
|
info.ram_mb = _detect_ram_mb()
|
||||||
|
|
||||||
# CUDA detection
|
# CUDA detection — verify runtime libraries actually work, not just torch detection
|
||||||
try:
|
try:
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
|
# Test that CUDA runtime libraries are actually loadable
|
||||||
|
try:
|
||||||
|
torch.zeros(1, device="cuda")
|
||||||
info.has_cuda = True
|
info.has_cuda = True
|
||||||
info.cuda_device_name = torch.cuda.get_device_name(0)
|
info.cuda_device_name = torch.cuda.get_device_name(0)
|
||||||
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
|
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
|
||||||
|
file=sys.stderr,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)
|
print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)
|
||||||
|
|
||||||
|
|||||||
@@ -77,11 +77,28 @@ class TranscribeService:
|
|||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
self._model = WhisperModel(
|
self._model = WhisperModel(
|
||||||
model_name,
|
model_name,
|
||||||
device=device,
|
device=device,
|
||||||
compute_type=compute_type,
|
compute_type=compute_type,
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
if device != "cpu":
|
||||||
|
print(
|
||||||
|
f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
|
||||||
|
file=sys.stderr,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
device = "cpu"
|
||||||
|
compute_type = "int8"
|
||||||
|
self._model = WhisperModel(
|
||||||
|
model_name,
|
||||||
|
device=device,
|
||||||
|
compute_type=compute_type,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
self._current_model_name = model_name
|
self._current_model_name = model_name
|
||||||
self._current_device = device
|
self._current_device = device
|
||||||
self._current_compute_type = compute_type
|
self._current_compute_type = compute_type
|
||||||
|
|||||||
Reference in New Issue
Block a user