Fix CUDA fallback: gracefully fall back to CPU when CUDA libs missing

- transcribe: catch model load failures on CUDA and retry with CPU - hardware detect: test CUDA runtime actually works (torch.zeros on cuda) before recommending GPU, since CPU-only builds detect CUDA via driver but lack cublas/cuDNN libraries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 05:36:38 -07:00
parent 2be5024de7
commit 7efa3bb116
2 changed files with 35 additions and 9 deletions
@@ -77,11 +77,28 @@ class TranscribeService:
            file=sys.stderr,
            flush=True,
        )
-        self._model = WhisperModel(
-            model_name,
-            device=device,
-            compute_type=compute_type,
-        )
+        try:
+            self._model = WhisperModel(
+                model_name,
+                device=device,
+                compute_type=compute_type,
+            )
+        except Exception as e:
+            if device != "cpu":
+                print(
+                    f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
+                    file=sys.stderr,
+                    flush=True,
+                )
+                device = "cpu"
+                compute_type = "int8"
+                self._model = WhisperModel(
+                    model_name,
+                    device=device,
+                    compute_type=compute_type,
+                )
+            else:
+                raise
        self._current_model_name = model_name
        self._current_device = device
        self._current_compute_type = compute_type