Files
local-transcription/test_components.py
Josh Knapp 472233aec4 Initial commit: Local Transcription App v1.0
Phase 1 Complete - Standalone Desktop Application

Features:
- Real-time speech-to-text with Whisper (faster-whisper)
- PySide6 desktop GUI with settings dialog
- Web server for OBS browser source integration
- Audio capture with automatic sample rate detection and resampling
- Noise suppression with Voice Activity Detection (VAD)
- Configurable display settings (font, timestamps, fade duration)
- Settings apply without restart (with automatic model reloading)
- Auto-fade for web display transcriptions
- CPU/GPU support with automatic device detection
- Standalone executable builds (PyInstaller)
- CUDA build support (works on systems without CUDA hardware)

Components:
- Audio capture with sounddevice
- Noise reduction with noisereduce + webrtcvad
- Transcription with faster-whisper
- GUI with PySide6
- Web server with FastAPI + WebSocket
- Configuration system with YAML

Build System:
- Standard builds (CPU-only): build.sh / build.bat
- CUDA builds (universal): build-cuda.sh / build-cuda.bat
- Comprehensive BUILD.md documentation
- Cross-platform support (Linux, Windows)

Documentation:
- README.md with project overview and quick start
- BUILD.md with detailed build instructions
- NEXT_STEPS.md with future enhancement roadmap
- INSTALL.md with setup instructions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-25 18:48:23 -08:00

125 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Test script to verify all components work without GUI.
This can run in headless environments.
"""
import sys
from pathlib import Path
# Add project root to Python path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
print("=" * 60)
print("Testing Local Transcription Components (No GUI)")
print("=" * 60)
# Test 1: Configuration
print("\n1. Testing Configuration System...")
try:
from client.config import Config
config = Config()
print(f" ✓ Config loaded: {config.config_path}")
print(f" ✓ User name: {config.get('user.name')}")
print(f" ✓ Model: {config.get('transcription.model')}")
except Exception as e:
print(f" ✗ Config failed: {e}")
sys.exit(1)
# Test 2: Device Detection
print("\n2. Testing Device Detection...")
try:
from client.device_utils import DeviceManager
device_mgr = DeviceManager()
print(f" ✓ Available devices: {device_mgr.available_devices}")
print(f" ✓ Current device: {device_mgr.current_device}")
print(f" ✓ GPU available: {device_mgr.is_gpu_available()}")
device_info = device_mgr.get_device_info()
for dev_id, dev_desc in device_info:
print(f" - {dev_id}: {dev_desc}")
except Exception as e:
print(f" ✗ Device detection failed: {e}")
sys.exit(1)
# Test 3: Audio Devices
print("\n3. Testing Audio Capture...")
try:
from client.audio_capture import AudioCapture
devices = AudioCapture.get_input_devices()
print(f" ✓ Found {len(devices)} audio input device(s)")
for idx, name in devices[:5]: # Show first 5
print(f" - [{idx}] {name}")
if len(devices) > 5:
print(f" ... and {len(devices) - 5} more")
except Exception as e:
print(f" ✗ Audio capture failed: {e}")
# Test 4: Noise Suppression
print("\n4. Testing Noise Suppression...")
try:
from client.noise_suppression import NoiseSuppressor
import numpy as np
suppressor = NoiseSuppressor(sample_rate=16000, method="noisereduce", strength=0.7)
print(f" ✓ Noise suppressor created: {suppressor}")
# Test with dummy audio
test_audio = np.random.randn(16000).astype(np.float32) * 0.1
processed = suppressor.process(test_audio, skip_silent=False)
print(f" ✓ Processed audio shape: {processed.shape}")
except Exception as e:
print(f" ✗ Noise suppression failed: {e}")
# Test 5: Transcription Engine
print("\n5. Testing Transcription Engine (Loading Model)...")
try:
from client.transcription_engine import TranscriptionEngine
device = device_mgr.get_device_for_whisper()
compute_type = device_mgr.get_compute_type()
print(f" → Using device: {device} with compute type: {compute_type}")
print(f" → Loading model (this may take 1-2 minutes on first run)...")
engine = TranscriptionEngine(
model_size="tiny", # Use tiny for faster testing
device=device,
compute_type=compute_type,
language="en"
)
success = engine.load_model()
if success:
print(f" ✓ Model loaded successfully!")
print(f" ✓ Engine: {engine}")
# Test transcription with dummy audio
print(f"\n Testing transcription with silent audio...")
test_audio = np.zeros(48000, dtype=np.float32) # 3 seconds of silence
result = engine.transcribe(test_audio, sample_rate=16000, user_name="Test")
if result:
print(f" ✓ Transcription result: '{result.text}'")
else:
print(f" No transcription (expected for silent audio)")
engine.unload_model()
else:
print(f" ✗ Model loading failed")
sys.exit(1)
except Exception as e:
print(f" ✗ Transcription engine failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
print("\n" + "=" * 60)
print("✓ All Components Tested Successfully!")
print("=" * 60)
print("\nThe application is ready to use!")
print("Run 'uv run python main.py' on a system with a display.")
print("=" * 60)