diff --git a/python/voice_to_notes/hardware/detect.py b/python/voice_to_notes/hardware/detect.py
index 408d26a..284ce2a 100644
--- a/python/voice_to_notes/hardware/detect.py
+++ b/python/voice_to_notes/hardware/detect.py
@@ -105,14 +105,23 @@ def detect_hardware() -> HardwareInfo:
     # RAM info (cross-platform)
     info.ram_mb = _detect_ram_mb()
 
-    # CUDA detection
+    # CUDA detection — verify runtime libraries actually work, not just torch detection
     try:
         import torch
 
         if torch.cuda.is_available():
-            info.has_cuda = True
-            info.cuda_device_name = torch.cuda.get_device_name(0)
-            info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
+            # Test that CUDA runtime libraries are actually loadable
+            try:
+                torch.zeros(1, device="cuda")
+                info.has_cuda = True
+                info.cuda_device_name = torch.cuda.get_device_name(0)
+                info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
+            except Exception as e:
+                print(
+                    f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
+                    file=sys.stderr,
+                    flush=True,
+                )
     except ImportError:
         print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)
 
diff --git a/python/voice_to_notes/services/transcribe.py b/python/voice_to_notes/services/transcribe.py
index 87bb01d..0d975ce 100644
--- a/python/voice_to_notes/services/transcribe.py
+++ b/python/voice_to_notes/services/transcribe.py
@@ -77,11 +77,28 @@ class TranscribeService:
             file=sys.stderr,
             flush=True,
         )
-        self._model = WhisperModel(
-            model_name,
-            device=device,
-            compute_type=compute_type,
-        )
+        try:
+            self._model = WhisperModel(
+                model_name,
+                device=device,
+                compute_type=compute_type,
+            )
+        except Exception as e:
+            if device != "cpu":
+                print(
+                    f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
+                    file=sys.stderr,
+                    flush=True,
+                )
+                device = "cpu"
+                compute_type = "int8"
+                self._model = WhisperModel(
+                    model_name,
+                    device=device,
+                    compute_type=compute_type,
+                )
+            else:
+                raise
         self._current_model_name = model_name
         self._current_device = device
         self._current_compute_type = compute_type