Add CUDA diagnostic script for troubleshooting GPU detection
- Checks PyTorch installation and version - Detects CUDA availability and GPU info - Tests CUDA with simple tensor operation - Shows device manager detection results - Provides troubleshooting hints for CPU-only builds Usage: python check_cuda.py or uv run check_cuda.py
This commit is contained in:
97
check_cuda.py
Normal file
97
check_cuda.py
Normal file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Diagnostic script to check CUDA availability and GPU detection."""
|
||||
|
||||
import sys
|
||||
|
||||
print("=" * 60)
|
||||
print("CUDA/GPU Detection Diagnostic")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Check Python version
|
||||
print(f"Python version: {sys.version}")
|
||||
print()
|
||||
|
||||
# Check PyTorch
|
||||
try:
|
||||
import torch
|
||||
print(f"✓ PyTorch installed: {torch.__version__}")
|
||||
print()
|
||||
|
||||
# Check CUDA availability
|
||||
print("CUDA Detection:")
|
||||
print(f" CUDA available: {torch.cuda.is_available()}")
|
||||
|
||||
if torch.cuda.is_available():
|
||||
print(f" CUDA version: {torch.version.cuda}")
|
||||
print(f" cuDNN version: {torch.backends.cudnn.version()}")
|
||||
print(f" GPU count: {torch.cuda.device_count()}")
|
||||
|
||||
# List all GPUs
|
||||
for i in range(torch.cuda.device_count()):
|
||||
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
|
||||
print(f" Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
|
||||
|
||||
# Test CUDA with a simple tensor operation
|
||||
try:
|
||||
test_tensor = torch.tensor([1.0, 2.0, 3.0]).cuda()
|
||||
print(f"\n ✓ CUDA tensor test successful: {test_tensor.device}")
|
||||
except Exception as e:
|
||||
print(f"\n ✗ CUDA tensor test failed: {e}")
|
||||
else:
|
||||
print(" ℹ No CUDA GPUs detected")
|
||||
print()
|
||||
print(" Possible reasons:")
|
||||
print(" 1. No NVIDIA GPU installed")
|
||||
print(" 2. NVIDIA drivers not installed or outdated")
|
||||
print(" 3. PyTorch built without CUDA support (CPU-only)")
|
||||
print()
|
||||
print(" To check if PyTorch has CUDA support:")
|
||||
print(f" Built with CUDA: {torch.version.cuda is not None}")
|
||||
|
||||
if torch.version.cuda is None:
|
||||
print()
|
||||
print(" ⚠ PyTorch is CPU-only!")
|
||||
print(" To enable CUDA, reinstall PyTorch with CUDA support:")
|
||||
print(" uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121")
|
||||
|
||||
print()
|
||||
|
||||
# Check MPS (Apple Silicon)
|
||||
if hasattr(torch.backends, 'mps'):
|
||||
print("MPS Detection (Apple Silicon):")
|
||||
print(f" MPS available: {torch.backends.mps.is_available()}")
|
||||
if torch.backends.mps.is_available():
|
||||
print(f" MPS built: {torch.backends.mps.is_built()}")
|
||||
|
||||
except ImportError as e:
|
||||
print(f"✗ PyTorch not installed: {e}")
|
||||
print()
|
||||
print("Install with: uv pip install torch")
|
||||
|
||||
print()
|
||||
|
||||
# Check our device manager
|
||||
try:
|
||||
from client.device_utils import DeviceManager
|
||||
|
||||
print("=" * 60)
|
||||
print("Device Manager Detection")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
dm = DeviceManager()
|
||||
print(f"Available devices: {dm.available_devices}")
|
||||
print(f"Current device: {dm.current_device}")
|
||||
print(f"GPU available: {dm.is_gpu_available()}")
|
||||
print()
|
||||
|
||||
print("Device info:")
|
||||
for device, description in dm.get_device_info():
|
||||
print(f" {device}: {description}")
|
||||
|
||||
except ImportError as e:
|
||||
print(f"Device manager not available: {e}")
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
Reference in New Issue
Block a user