- Checks PyTorch installation and version - Detects CUDA availability and GPU info - Tests CUDA with simple tensor operation - Shows device manager detection results - Provides troubleshooting hints for CPU-only builds Usage: python check_cuda.py or uv run check_cuda.py
98 lines
3.0 KiB
Python
98 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
||
"""Diagnostic script to check CUDA availability and GPU detection."""
|
||
|
||
import sys
|
||
|
||
print("=" * 60)
|
||
print("CUDA/GPU Detection Diagnostic")
|
||
print("=" * 60)
|
||
print()
|
||
|
||
# Check Python version
|
||
print(f"Python version: {sys.version}")
|
||
print()
|
||
|
||
# Check PyTorch
|
||
try:
|
||
import torch
|
||
print(f"✓ PyTorch installed: {torch.__version__}")
|
||
print()
|
||
|
||
# Check CUDA availability
|
||
print("CUDA Detection:")
|
||
print(f" CUDA available: {torch.cuda.is_available()}")
|
||
|
||
if torch.cuda.is_available():
|
||
print(f" CUDA version: {torch.version.cuda}")
|
||
print(f" cuDNN version: {torch.backends.cudnn.version()}")
|
||
print(f" GPU count: {torch.cuda.device_count()}")
|
||
|
||
# List all GPUs
|
||
for i in range(torch.cuda.device_count()):
|
||
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
|
||
print(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
|
||
|
||
# Test CUDA with a simple tensor operation
|
||
try:
|
||
test_tensor = torch.tensor([1.0, 2.0, 3.0]).cuda()
|
||
print(f"\n ✓ CUDA tensor test successful: {test_tensor.device}")
|
||
except Exception as e:
|
||
print(f"\n ✗ CUDA tensor test failed: {e}")
|
||
else:
|
||
print(" ℹ No CUDA GPUs detected")
|
||
print()
|
||
print(" Possible reasons:")
|
||
print(" 1. No NVIDIA GPU installed")
|
||
print(" 2. NVIDIA drivers not installed or outdated")
|
||
print(" 3. PyTorch built without CUDA support (CPU-only)")
|
||
print()
|
||
print(" To check if PyTorch has CUDA support:")
|
||
print(f" Built with CUDA: {torch.version.cuda is not None}")
|
||
|
||
if torch.version.cuda is None:
|
||
print()
|
||
print(" ⚠ PyTorch is CPU-only!")
|
||
print(" To enable CUDA, reinstall PyTorch with CUDA support:")
|
||
print(" uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121")
|
||
|
||
print()
|
||
|
||
# Check MPS (Apple Silicon)
|
||
if hasattr(torch.backends, 'mps'):
|
||
print("MPS Detection (Apple Silicon):")
|
||
print(f" MPS available: {torch.backends.mps.is_available()}")
|
||
if torch.backends.mps.is_available():
|
||
print(f" MPS built: {torch.backends.mps.is_built()}")
|
||
|
||
except ImportError as e:
|
||
print(f"✗ PyTorch not installed: {e}")
|
||
print()
|
||
print("Install with: uv pip install torch")
|
||
|
||
print()
|
||
|
||
# Check our device manager
|
||
try:
|
||
from client.device_utils import DeviceManager
|
||
|
||
print("=" * 60)
|
||
print("Device Manager Detection")
|
||
print("=" * 60)
|
||
print()
|
||
|
||
dm = DeviceManager()
|
||
print(f"Available devices: {dm.available_devices}")
|
||
print(f"Current device: {dm.current_device}")
|
||
print(f"GPU available: {dm.is_gpu_available()}")
|
||
print()
|
||
|
||
print("Device info:")
|
||
for device, description in dm.get_device_info():
|
||
print(f" {device}: {description}")
|
||
|
||
except ImportError as e:
|
||
print(f"Device manager not available: {e}")
|
||
|
||
print()
|
||
print("=" * 60)
|