Initial commit: Local Transcription App v1.0
Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
524
gui/main_window_qt.py
Normal file
524
gui/main_window_qt.py
Normal file
@@ -0,0 +1,524 @@
|
||||
"""PySide6 main application window for the local transcription app."""
|
||||
|
||||
from PySide6.QtWidgets import (
|
||||
QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
|
||||
QPushButton, QLabel, QFileDialog, QMessageBox
|
||||
)
|
||||
from PySide6.QtCore import Qt, QThread, Signal
|
||||
from PySide6.QtGui import QFont
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from client.config import Config
|
||||
from client.device_utils import DeviceManager
|
||||
from client.audio_capture import AudioCapture
|
||||
from client.noise_suppression import NoiseSuppressor
|
||||
from client.transcription_engine import TranscriptionEngine
|
||||
from gui.transcription_display_qt import TranscriptionDisplay
|
||||
from gui.settings_dialog_qt import SettingsDialog
|
||||
from server.web_display import TranscriptionWebServer
|
||||
import asyncio
|
||||
from threading import Thread
|
||||
|
||||
|
||||
class WebServerThread(Thread):
|
||||
"""Thread for running the web server."""
|
||||
|
||||
def __init__(self, web_server):
|
||||
super().__init__(daemon=True)
|
||||
self.web_server = web_server
|
||||
self.loop = None
|
||||
|
||||
def run(self):
|
||||
"""Run the web server in async event loop."""
|
||||
self.loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self.loop)
|
||||
self.loop.run_until_complete(self.web_server.start())
|
||||
|
||||
|
||||
class ModelLoaderThread(QThread):
|
||||
"""Thread for loading the Whisper model without blocking the GUI."""
|
||||
|
||||
finished = Signal(bool, str) # success, message
|
||||
|
||||
def __init__(self, transcription_engine):
|
||||
super().__init__()
|
||||
self.transcription_engine = transcription_engine
|
||||
|
||||
def run(self):
|
||||
"""Load the model in background thread."""
|
||||
try:
|
||||
success = self.transcription_engine.load_model()
|
||||
if success:
|
||||
self.finished.emit(True, "Model loaded successfully")
|
||||
else:
|
||||
self.finished.emit(False, "Failed to load model")
|
||||
except Exception as e:
|
||||
self.finished.emit(False, f"Error loading model: {e}")
|
||||
|
||||
|
||||
class MainWindow(QMainWindow):
|
||||
"""Main application window using PySide6."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the main window."""
|
||||
super().__init__()
|
||||
|
||||
# Application state
|
||||
self.is_transcribing = False
|
||||
self.config = Config()
|
||||
self.device_manager = DeviceManager()
|
||||
|
||||
# Components (initialized later)
|
||||
self.audio_capture: AudioCapture = None
|
||||
self.noise_suppressor: NoiseSuppressor = None
|
||||
self.transcription_engine: TranscriptionEngine = None
|
||||
self.model_loader_thread: ModelLoaderThread = None
|
||||
|
||||
# Track current model settings
|
||||
self.current_model_size: str = None
|
||||
self.current_device_config: str = None
|
||||
|
||||
# Web server components
|
||||
self.web_server: TranscriptionWebServer = None
|
||||
self.web_server_thread: WebServerThread = None
|
||||
|
||||
# Configure window
|
||||
self.setWindowTitle("Local Transcription")
|
||||
self.resize(900, 700)
|
||||
|
||||
# Create UI
|
||||
self._create_widgets()
|
||||
|
||||
# Initialize components (in background)
|
||||
self._initialize_components()
|
||||
|
||||
# Start web server if enabled
|
||||
self._start_web_server_if_enabled()
|
||||
|
||||
def _create_widgets(self):
|
||||
"""Create all UI widgets."""
|
||||
# Central widget
|
||||
central_widget = QWidget()
|
||||
self.setCentralWidget(central_widget)
|
||||
|
||||
main_layout = QVBoxLayout()
|
||||
central_widget.setLayout(main_layout)
|
||||
|
||||
# Header
|
||||
header_widget = QWidget()
|
||||
header_widget.setFixedHeight(80)
|
||||
header_layout = QHBoxLayout()
|
||||
header_widget.setLayout(header_layout)
|
||||
|
||||
title_label = QLabel("Local Transcription")
|
||||
title_font = QFont()
|
||||
title_font.setPointSize(24)
|
||||
title_font.setBold(True)
|
||||
title_label.setFont(title_font)
|
||||
header_layout.addWidget(title_label)
|
||||
|
||||
header_layout.addStretch()
|
||||
|
||||
self.settings_button = QPushButton("⚙ Settings")
|
||||
self.settings_button.setFixedSize(120, 40)
|
||||
self.settings_button.clicked.connect(self._open_settings)
|
||||
header_layout.addWidget(self.settings_button)
|
||||
|
||||
main_layout.addWidget(header_widget)
|
||||
|
||||
# Status bar
|
||||
status_widget = QWidget()
|
||||
status_widget.setFixedHeight(60)
|
||||
status_layout = QHBoxLayout()
|
||||
status_widget.setLayout(status_layout)
|
||||
|
||||
self.status_label = QLabel("⚫ Initializing...")
|
||||
status_font = QFont()
|
||||
status_font.setPointSize(14)
|
||||
self.status_label.setFont(status_font)
|
||||
status_layout.addWidget(self.status_label)
|
||||
|
||||
device_info = self.device_manager.get_device_info()
|
||||
device_text = device_info[0][1] if device_info else "No device"
|
||||
self.device_label = QLabel(f"Device: {device_text}")
|
||||
status_layout.addWidget(self.device_label)
|
||||
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
self.user_label = QLabel(f"User: {user_name}")
|
||||
status_layout.addWidget(self.user_label)
|
||||
|
||||
status_layout.addStretch()
|
||||
|
||||
main_layout.addWidget(status_widget)
|
||||
|
||||
# Transcription display
|
||||
self.transcription_display = TranscriptionDisplay(
|
||||
max_lines=self.config.get('display.max_lines', 100),
|
||||
show_timestamps=self.config.get('display.show_timestamps', True),
|
||||
font_family=self.config.get('display.font_family', 'Courier'),
|
||||
font_size=self.config.get('display.font_size', 12)
|
||||
)
|
||||
main_layout.addWidget(self.transcription_display)
|
||||
|
||||
# Control buttons
|
||||
control_widget = QWidget()
|
||||
control_widget.setFixedHeight(80)
|
||||
control_layout = QHBoxLayout()
|
||||
control_widget.setLayout(control_layout)
|
||||
|
||||
self.start_button = QPushButton("▶ Start Transcription")
|
||||
self.start_button.setFixedSize(240, 50)
|
||||
button_font = QFont()
|
||||
button_font.setPointSize(14)
|
||||
button_font.setBold(True)
|
||||
self.start_button.setFont(button_font)
|
||||
self.start_button.clicked.connect(self._toggle_transcription)
|
||||
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
|
||||
control_layout.addWidget(self.start_button)
|
||||
|
||||
self.clear_button = QPushButton("Clear")
|
||||
self.clear_button.setFixedSize(120, 50)
|
||||
self.clear_button.clicked.connect(self._clear_transcriptions)
|
||||
control_layout.addWidget(self.clear_button)
|
||||
|
||||
self.save_button = QPushButton("💾 Save")
|
||||
self.save_button.setFixedSize(120, 50)
|
||||
self.save_button.clicked.connect(self._save_transcriptions)
|
||||
control_layout.addWidget(self.save_button)
|
||||
|
||||
control_layout.addStretch()
|
||||
|
||||
main_layout.addWidget(control_widget)
|
||||
|
||||
def _initialize_components(self):
|
||||
"""Initialize audio, noise suppression, and transcription components."""
|
||||
# Update status
|
||||
self.status_label.setText("⚙ Initializing...")
|
||||
|
||||
# Set device based on config
|
||||
device_config = self.config.get('transcription.device', 'auto')
|
||||
self.device_manager.set_device(device_config)
|
||||
|
||||
# Initialize transcription engine
|
||||
model_size = self.config.get('transcription.model', 'base')
|
||||
language = self.config.get('transcription.language', 'en')
|
||||
device = self.device_manager.get_device_for_whisper()
|
||||
compute_type = self.device_manager.get_compute_type()
|
||||
|
||||
# Track current settings
|
||||
self.current_model_size = model_size
|
||||
self.current_device_config = device_config
|
||||
|
||||
self.transcription_engine = TranscriptionEngine(
|
||||
model_size=model_size,
|
||||
device=device,
|
||||
compute_type=compute_type,
|
||||
language=language,
|
||||
min_confidence=self.config.get('processing.min_confidence', 0.5)
|
||||
)
|
||||
|
||||
# Load model in background thread
|
||||
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
|
||||
self.model_loader_thread.finished.connect(self._on_model_loaded)
|
||||
self.model_loader_thread.start()
|
||||
|
||||
def _on_model_loaded(self, success: bool, message: str):
|
||||
"""Handle model loading completion."""
|
||||
if success:
|
||||
host = self.config.get('web_server.host', '127.0.0.1')
|
||||
port = self.config.get('web_server.port', 8080)
|
||||
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
|
||||
self.start_button.setEnabled(True)
|
||||
else:
|
||||
self.status_label.setText("❌ Model loading failed")
|
||||
QMessageBox.critical(self, "Error", message)
|
||||
self.start_button.setEnabled(False)
|
||||
|
||||
def _start_web_server_if_enabled(self):
|
||||
"""Start web server."""
|
||||
host = self.config.get('web_server.host', '127.0.0.1')
|
||||
port = self.config.get('web_server.port', 8080)
|
||||
show_timestamps = self.config.get('display.show_timestamps', True)
|
||||
fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
|
||||
|
||||
print(f"Starting web server at http://{host}:{port}")
|
||||
self.web_server = TranscriptionWebServer(
|
||||
host=host,
|
||||
port=port,
|
||||
show_timestamps=show_timestamps,
|
||||
fade_after_seconds=fade_after_seconds
|
||||
)
|
||||
self.web_server_thread = WebServerThread(self.web_server)
|
||||
self.web_server_thread.start()
|
||||
|
||||
def _toggle_transcription(self):
|
||||
"""Start or stop transcription."""
|
||||
if not self.is_transcribing:
|
||||
self._start_transcription()
|
||||
else:
|
||||
self._stop_transcription()
|
||||
|
||||
def _start_transcription(self):
|
||||
"""Start transcription."""
|
||||
try:
|
||||
# Check if engine is ready
|
||||
if not self.transcription_engine or not self.transcription_engine.is_loaded:
|
||||
QMessageBox.critical(self, "Error", "Transcription engine not ready")
|
||||
return
|
||||
|
||||
# Get audio device
|
||||
audio_device_str = self.config.get('audio.input_device', 'default')
|
||||
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
|
||||
|
||||
# Initialize audio capture
|
||||
self.audio_capture = AudioCapture(
|
||||
sample_rate=self.config.get('audio.sample_rate', 16000),
|
||||
chunk_duration=self.config.get('audio.chunk_duration', 3.0),
|
||||
device=audio_device
|
||||
)
|
||||
|
||||
# Initialize noise suppressor
|
||||
self.noise_suppressor = NoiseSuppressor(
|
||||
sample_rate=self.config.get('audio.sample_rate', 16000),
|
||||
method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
|
||||
strength=self.config.get('noise_suppression.strength', 0.7),
|
||||
use_vad=self.config.get('processing.use_vad', True)
|
||||
)
|
||||
|
||||
# Start recording
|
||||
self.audio_capture.start_recording(callback=self._process_audio_chunk)
|
||||
|
||||
# Update UI
|
||||
self.is_transcribing = True
|
||||
self.start_button.setText("⏸ Stop Transcription")
|
||||
self.start_button.setStyleSheet("background-color: #e74c3c; color: white;")
|
||||
self.status_label.setText("🔴 Recording...")
|
||||
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error", f"Failed to start transcription:\n{e}")
|
||||
print(f"Error starting transcription: {e}")
|
||||
|
||||
def _stop_transcription(self):
|
||||
"""Stop transcription."""
|
||||
try:
|
||||
# Stop recording
|
||||
if self.audio_capture:
|
||||
self.audio_capture.stop_recording()
|
||||
|
||||
# Update UI
|
||||
self.is_transcribing = False
|
||||
self.start_button.setText("▶ Start Transcription")
|
||||
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
|
||||
self.status_label.setText("✓ Ready")
|
||||
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error", f"Failed to stop transcription:\n{e}")
|
||||
print(f"Error stopping transcription: {e}")
|
||||
|
||||
def _process_audio_chunk(self, audio_chunk):
|
||||
"""Process an audio chunk (noise suppression + transcription)."""
|
||||
def process():
|
||||
try:
|
||||
# Apply noise suppression
|
||||
processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
|
||||
|
||||
# Skip if silent (VAD filtered it out)
|
||||
if processed_audio is None:
|
||||
return
|
||||
|
||||
# Transcribe
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
result = self.transcription_engine.transcribe(
|
||||
processed_audio,
|
||||
sample_rate=self.config.get('audio.sample_rate', 16000),
|
||||
user_name=user_name
|
||||
)
|
||||
|
||||
# Display result (use Qt signal for thread safety)
|
||||
if result:
|
||||
# We need to update UI from main thread
|
||||
# Note: We don't pass timestamp - let the display widget create it
|
||||
from PySide6.QtCore import QMetaObject, Q_ARG
|
||||
QMetaObject.invokeMethod(
|
||||
self.transcription_display,
|
||||
"add_transcription",
|
||||
Qt.QueuedConnection,
|
||||
Q_ARG(str, result.text),
|
||||
Q_ARG(str, result.user_name)
|
||||
)
|
||||
|
||||
# Broadcast to web server if enabled
|
||||
if self.web_server and self.web_server_thread:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self.web_server.broadcast_transcription(
|
||||
result.text,
|
||||
result.user_name,
|
||||
result.timestamp
|
||||
),
|
||||
self.web_server_thread.loop
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing audio: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Run in background thread
|
||||
from threading import Thread
|
||||
Thread(target=process, daemon=True).start()
|
||||
|
||||
def _clear_transcriptions(self):
|
||||
"""Clear all transcriptions."""
|
||||
reply = QMessageBox.question(
|
||||
self,
|
||||
"Clear Transcriptions",
|
||||
"Are you sure you want to clear all transcriptions?",
|
||||
QMessageBox.Yes | QMessageBox.No
|
||||
)
|
||||
|
||||
if reply == QMessageBox.Yes:
|
||||
self.transcription_display.clear_all()
|
||||
|
||||
def _save_transcriptions(self):
|
||||
"""Save transcriptions to file."""
|
||||
filepath, _ = QFileDialog.getSaveFileName(
|
||||
self,
|
||||
"Save Transcriptions",
|
||||
"",
|
||||
"Text files (*.txt);;All files (*.*)"
|
||||
)
|
||||
|
||||
if filepath:
|
||||
if self.transcription_display.save_to_file(filepath):
|
||||
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
|
||||
else:
|
||||
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
|
||||
|
||||
def _open_settings(self):
|
||||
"""Open settings dialog."""
|
||||
# Get audio devices
|
||||
audio_devices = AudioCapture.get_input_devices()
|
||||
if not audio_devices:
|
||||
audio_devices = [(0, "Default")]
|
||||
|
||||
# Get compute devices
|
||||
compute_devices = self.device_manager.get_device_info()
|
||||
compute_devices.insert(0, ("auto", "Auto-detect"))
|
||||
|
||||
# Open settings dialog
|
||||
dialog = SettingsDialog(
|
||||
self,
|
||||
self.config,
|
||||
audio_devices,
|
||||
compute_devices,
|
||||
on_save=self._on_settings_saved
|
||||
)
|
||||
dialog.exec()
|
||||
|
||||
def _on_settings_saved(self):
|
||||
"""Handle settings being saved."""
|
||||
# Update user label
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
self.user_label.setText(f"User: {user_name}")
|
||||
|
||||
# Update display settings
|
||||
show_timestamps = self.config.get('display.show_timestamps', True)
|
||||
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
|
||||
self.transcription_display.set_show_timestamps(show_timestamps)
|
||||
self.transcription_display.set_font(
|
||||
self.config.get('display.font_family', 'Courier'),
|
||||
self.config.get('display.font_size', 12)
|
||||
)
|
||||
|
||||
# Update web server settings
|
||||
if self.web_server:
|
||||
self.web_server.show_timestamps = show_timestamps
|
||||
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
|
||||
|
||||
# Check if model/device settings changed - reload model if needed
|
||||
new_model = self.config.get('transcription.model', 'base')
|
||||
new_device_config = self.config.get('transcription.device', 'auto')
|
||||
|
||||
# Only reload if model size or device changed
|
||||
if self.current_model_size != new_model or self.current_device_config != new_device_config:
|
||||
self._reload_model()
|
||||
else:
|
||||
QMessageBox.information(self, "Settings Saved", "Settings have been applied successfully!")
|
||||
|
||||
def _reload_model(self):
|
||||
"""Reload the transcription model with new settings."""
|
||||
# Stop transcription if running
|
||||
was_transcribing = self.is_transcribing
|
||||
if was_transcribing:
|
||||
self._stop_transcription()
|
||||
|
||||
# Update status
|
||||
self.status_label.setText("⚙ Reloading model...")
|
||||
self.start_button.setEnabled(False)
|
||||
|
||||
# Unload current model
|
||||
if self.transcription_engine:
|
||||
self.transcription_engine.unload_model()
|
||||
|
||||
# Set device based on config
|
||||
device_config = self.config.get('transcription.device', 'auto')
|
||||
self.device_manager.set_device(device_config)
|
||||
|
||||
# Re-initialize transcription engine
|
||||
model_size = self.config.get('transcription.model', 'base')
|
||||
language = self.config.get('transcription.language', 'en')
|
||||
device = self.device_manager.get_device_for_whisper()
|
||||
compute_type = self.device_manager.get_compute_type()
|
||||
|
||||
# Update tracked settings
|
||||
self.current_model_size = model_size
|
||||
self.current_device_config = device_config
|
||||
|
||||
self.transcription_engine = TranscriptionEngine(
|
||||
model_size=model_size,
|
||||
device=device,
|
||||
compute_type=compute_type,
|
||||
language=language,
|
||||
min_confidence=self.config.get('processing.min_confidence', 0.5)
|
||||
)
|
||||
|
||||
# Load model in background thread
|
||||
if self.model_loader_thread and self.model_loader_thread.isRunning():
|
||||
self.model_loader_thread.wait()
|
||||
|
||||
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
|
||||
self.model_loader_thread.finished.connect(self._on_model_reloaded)
|
||||
self.model_loader_thread.start()
|
||||
|
||||
def _on_model_reloaded(self, success: bool, message: str):
|
||||
"""Handle model reloading completion."""
|
||||
if success:
|
||||
host = self.config.get('web_server.host', '127.0.0.1')
|
||||
port = self.config.get('web_server.port', 8080)
|
||||
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
|
||||
self.start_button.setEnabled(True)
|
||||
QMessageBox.information(self, "Settings Saved", "Model reloaded successfully with new settings!")
|
||||
else:
|
||||
self.status_label.setText("❌ Model loading failed")
|
||||
QMessageBox.critical(self, "Error", f"Failed to reload model:\n{message}")
|
||||
self.start_button.setEnabled(False)
|
||||
|
||||
def closeEvent(self, event):
|
||||
"""Handle window closing."""
|
||||
# Stop transcription if running
|
||||
if self.is_transcribing:
|
||||
self._stop_transcription()
|
||||
|
||||
# Unload model
|
||||
if self.transcription_engine:
|
||||
self.transcription_engine.unload_model()
|
||||
|
||||
# Wait for model loader thread
|
||||
if self.model_loader_thread and self.model_loader_thread.isRunning():
|
||||
self.model_loader_thread.wait()
|
||||
|
||||
event.accept()
|
||||
Reference in New Issue
Block a user