Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing

- transcribe: catch model load failures on CUDA and retry with CPU - hardware detect: test CUDA runtime actually works (torch.zeros on cuda) before recommending GPU, since CPU-only builds detect CUDA via driver but lack cublas/cuDNN libraries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 05:36:38 -07:00
parent 2be5024de7
commit 7efa3bb116
2 changed files with 35 additions and 9 deletions
@@ -105,14 +105,23 @@ def detect_hardware() -> HardwareInfo:
    # RAM info (cross-platform)
    info.ram_mb = _detect_ram_mb()
-    # CUDA detection
+    # CUDA detection — verify runtime libraries actually work, not just torch detection
    try:
        import torch
        if torch.cuda.is_available():
            # Test that CUDA runtime libraries are actually loadable
            try:
                torch.zeros(1, device="cuda")
                info.has_cuda = True
                info.cuda_device_name = torch.cuda.get_device_name(0)
                info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
            except Exception as e:
                print(
                    f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
                    file=sys.stderr,
                    flush=True,
                )
    except ImportError:
        print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)
@@ -77,11 +77,28 @@ class TranscribeService:
            file=sys.stderr,
            flush=True,
        )
        try:
            self._model = WhisperModel(
                model_name,
                device=device,
                compute_type=compute_type,
            )
        except Exception as e:
            if device != "cpu":
                print(
                    f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
                    file=sys.stderr,
                    flush=True,
                )
                device = "cpu"
                compute_type = "int8"
                self._model = WhisperModel(
                    model_name,
                    device=device,
                    compute_type=compute_type,
                )
            else:
                raise
        self._current_model_name = model_name
        self._current_device = device
        self._current_compute_type = compute_type