diff --git a/python/voice_to_notes/hardware/detect.py b/python/voice_to_notes/hardware/detect.py index 408d26a..284ce2a 100644 --- a/python/voice_to_notes/hardware/detect.py +++ b/python/voice_to_notes/hardware/detect.py @@ -105,14 +105,23 @@ def detect_hardware() -> HardwareInfo: # RAM info (cross-platform) info.ram_mb = _detect_ram_mb() - # CUDA detection + # CUDA detection — verify runtime libraries actually work, not just torch detection try: import torch if torch.cuda.is_available(): - info.has_cuda = True - info.cuda_device_name = torch.cuda.get_device_name(0) - info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024) + # Test that CUDA runtime libraries are actually loadable + try: + torch.zeros(1, device="cuda") + info.has_cuda = True + info.cuda_device_name = torch.cuda.get_device_name(0) + info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024) + except Exception as e: + print( + f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.", + file=sys.stderr, + flush=True, + ) except ImportError: print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True) diff --git a/python/voice_to_notes/services/transcribe.py b/python/voice_to_notes/services/transcribe.py index 87bb01d..0d975ce 100644 --- a/python/voice_to_notes/services/transcribe.py +++ b/python/voice_to_notes/services/transcribe.py @@ -77,11 +77,28 @@ class TranscribeService: file=sys.stderr, flush=True, ) - self._model = WhisperModel( - model_name, - device=device, - compute_type=compute_type, - ) + try: + self._model = WhisperModel( + model_name, + device=device, + compute_type=compute_type, + ) + except Exception as e: + if device != "cpu": + print( + f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.", + file=sys.stderr, + flush=True, + ) + device = "cpu" + compute_type = "int8" + self._model = WhisperModel( + model_name, + device=device, + compute_type=compute_type, + ) + else: + raise self._current_model_name = model_name self._current_device = device self._current_compute_type = compute_type