Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
125 lines
4.0 KiB
Python
125 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Test script to verify all components work without GUI.
|
||
This can run in headless environments.
|
||
"""
|
||
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
# Add project root to Python path
|
||
project_root = Path(__file__).parent
|
||
sys.path.insert(0, str(project_root))
|
||
|
||
print("=" * 60)
|
||
print("Testing Local Transcription Components (No GUI)")
|
||
print("=" * 60)
|
||
|
||
# Test 1: Configuration
|
||
print("\n1. Testing Configuration System...")
|
||
try:
|
||
from client.config import Config
|
||
config = Config()
|
||
print(f" ✓ Config loaded: {config.config_path}")
|
||
print(f" ✓ User name: {config.get('user.name')}")
|
||
print(f" ✓ Model: {config.get('transcription.model')}")
|
||
except Exception as e:
|
||
print(f" ✗ Config failed: {e}")
|
||
sys.exit(1)
|
||
|
||
# Test 2: Device Detection
|
||
print("\n2. Testing Device Detection...")
|
||
try:
|
||
from client.device_utils import DeviceManager
|
||
device_mgr = DeviceManager()
|
||
print(f" ✓ Available devices: {device_mgr.available_devices}")
|
||
print(f" ✓ Current device: {device_mgr.current_device}")
|
||
print(f" ✓ GPU available: {device_mgr.is_gpu_available()}")
|
||
|
||
device_info = device_mgr.get_device_info()
|
||
for dev_id, dev_desc in device_info:
|
||
print(f" - {dev_id}: {dev_desc}")
|
||
except Exception as e:
|
||
print(f" ✗ Device detection failed: {e}")
|
||
sys.exit(1)
|
||
|
||
# Test 3: Audio Devices
|
||
print("\n3. Testing Audio Capture...")
|
||
try:
|
||
from client.audio_capture import AudioCapture
|
||
devices = AudioCapture.get_input_devices()
|
||
print(f" ✓ Found {len(devices)} audio input device(s)")
|
||
for idx, name in devices[:5]: # Show first 5
|
||
print(f" - [{idx}] {name}")
|
||
if len(devices) > 5:
|
||
print(f" ... and {len(devices) - 5} more")
|
||
except Exception as e:
|
||
print(f" ✗ Audio capture failed: {e}")
|
||
|
||
# Test 4: Noise Suppression
|
||
print("\n4. Testing Noise Suppression...")
|
||
try:
|
||
from client.noise_suppression import NoiseSuppressor
|
||
import numpy as np
|
||
|
||
suppressor = NoiseSuppressor(sample_rate=16000, method="noisereduce", strength=0.7)
|
||
print(f" ✓ Noise suppressor created: {suppressor}")
|
||
|
||
# Test with dummy audio
|
||
test_audio = np.random.randn(16000).astype(np.float32) * 0.1
|
||
processed = suppressor.process(test_audio, skip_silent=False)
|
||
print(f" ✓ Processed audio shape: {processed.shape}")
|
||
except Exception as e:
|
||
print(f" ✗ Noise suppression failed: {e}")
|
||
|
||
# Test 5: Transcription Engine
|
||
print("\n5. Testing Transcription Engine (Loading Model)...")
|
||
try:
|
||
from client.transcription_engine import TranscriptionEngine
|
||
|
||
device = device_mgr.get_device_for_whisper()
|
||
compute_type = device_mgr.get_compute_type()
|
||
|
||
print(f" → Using device: {device} with compute type: {compute_type}")
|
||
print(f" → Loading model (this may take 1-2 minutes on first run)...")
|
||
|
||
engine = TranscriptionEngine(
|
||
model_size="tiny", # Use tiny for faster testing
|
||
device=device,
|
||
compute_type=compute_type,
|
||
language="en"
|
||
)
|
||
|
||
success = engine.load_model()
|
||
if success:
|
||
print(f" ✓ Model loaded successfully!")
|
||
print(f" ✓ Engine: {engine}")
|
||
|
||
# Test transcription with dummy audio
|
||
print(f"\n Testing transcription with silent audio...")
|
||
test_audio = np.zeros(48000, dtype=np.float32) # 3 seconds of silence
|
||
result = engine.transcribe(test_audio, sample_rate=16000, user_name="Test")
|
||
|
||
if result:
|
||
print(f" ✓ Transcription result: '{result.text}'")
|
||
else:
|
||
print(f" ℹ No transcription (expected for silent audio)")
|
||
|
||
engine.unload_model()
|
||
else:
|
||
print(f" ✗ Model loading failed")
|
||
sys.exit(1)
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Transcription engine failed: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
sys.exit(1)
|
||
|
||
print("\n" + "=" * 60)
|
||
print("✓ All Components Tested Successfully!")
|
||
print("=" * 60)
|
||
print("\nThe application is ready to use!")
|
||
print("Run 'uv run python main.py' on a system with a display.")
|
||
print("=" * 60)
|