Files
local-transcription/gui/main_window_qt.py

609 lines
23 KiB
Python

"""PySide6 main application window for the local transcription app."""
from PySide6.QtWidgets import (
QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QFileDialog, QMessageBox
)
from PySide6.QtCore import Qt, QThread, Signal
from PySide6.QtGui import QFont
from pathlib import Path
import sys
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.audio_capture import AudioCapture
from client.noise_suppression import NoiseSuppressor
from client.transcription_engine import TranscriptionEngine
from client.server_sync import ServerSyncClient
from gui.transcription_display_qt import TranscriptionDisplay
from gui.settings_dialog_qt import SettingsDialog
from server.web_display import TranscriptionWebServer
import asyncio
from threading import Thread
class WebServerThread(Thread):
"""Thread for running the web server."""
def __init__(self, web_server):
super().__init__(daemon=True)
self.web_server = web_server
self.loop = None
def run(self):
"""Run the web server in async event loop."""
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.loop.run_until_complete(self.web_server.start())
class ModelLoaderThread(QThread):
"""Thread for loading the Whisper model without blocking the GUI."""
finished = Signal(bool, str) # success, message
def __init__(self, transcription_engine):
super().__init__()
self.transcription_engine = transcription_engine
def run(self):
"""Load the model in background thread."""
try:
success = self.transcription_engine.load_model()
if success:
self.finished.emit(True, "Model loaded successfully")
else:
self.finished.emit(False, "Failed to load model")
except Exception as e:
self.finished.emit(False, f"Error loading model: {e}")
class MainWindow(QMainWindow):
"""Main application window using PySide6."""
def __init__(self):
"""Initialize the main window."""
super().__init__()
# Application state
self.is_transcribing = False
self.config = Config()
self.device_manager = DeviceManager()
# Components (initialized later)
self.audio_capture: AudioCapture = None
self.noise_suppressor: NoiseSuppressor = None
self.transcription_engine: TranscriptionEngine = None
self.model_loader_thread: ModelLoaderThread = None
# Track current model settings
self.current_model_size: str = None
self.current_device_config: str = None
# Web server components
self.web_server: TranscriptionWebServer = None
self.web_server_thread: WebServerThread = None
# Server sync components
self.server_sync_client: ServerSyncClient = None
# Configure window
self.setWindowTitle("Local Transcription")
self.resize(900, 700)
# Create UI
self._create_widgets()
# Initialize components (in background)
self._initialize_components()
# Start web server if enabled
self._start_web_server_if_enabled()
def _create_widgets(self):
"""Create all UI widgets."""
# Central widget
central_widget = QWidget()
self.setCentralWidget(central_widget)
main_layout = QVBoxLayout()
central_widget.setLayout(main_layout)
# Header
header_widget = QWidget()
header_widget.setFixedHeight(80)
header_layout = QHBoxLayout()
header_widget.setLayout(header_layout)
title_label = QLabel("Local Transcription")
title_font = QFont()
title_font.setPointSize(24)
title_font.setBold(True)
title_label.setFont(title_font)
header_layout.addWidget(title_label)
header_layout.addStretch()
self.settings_button = QPushButton("⚙ Settings")
self.settings_button.setFixedSize(120, 40)
self.settings_button.clicked.connect(self._open_settings)
header_layout.addWidget(self.settings_button)
main_layout.addWidget(header_widget)
# Status bar
status_widget = QWidget()
status_widget.setFixedHeight(60)
status_layout = QHBoxLayout()
status_widget.setLayout(status_layout)
self.status_label = QLabel("⚫ Initializing...")
status_font = QFont()
status_font.setPointSize(14)
self.status_label.setFont(status_font)
status_layout.addWidget(self.status_label)
device_info = self.device_manager.get_device_info()
device_text = device_info[0][1] if device_info else "No device"
self.device_label = QLabel(f"Device: {device_text}")
status_layout.addWidget(self.device_label)
user_name = self.config.get('user.name', 'User')
self.user_label = QLabel(f"User: {user_name}")
status_layout.addWidget(self.user_label)
# Web display link
web_host = self.config.get('web_server.host', '127.0.0.1')
web_port = self.config.get('web_server.port', 8080)
web_url = f"http://{web_host}:{web_port}"
self.web_link = QLabel(f'<a href="{web_url}">🌐 Open Web Display</a>')
self.web_link.setOpenExternalLinks(True)
self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)")
self.web_link.setStyleSheet("QLabel { color: #4CAF50; }")
status_layout.addWidget(self.web_link)
status_layout.addStretch()
main_layout.addWidget(status_widget)
# Transcription display
self.transcription_display = TranscriptionDisplay(
max_lines=self.config.get('display.max_lines', 100),
show_timestamps=self.config.get('display.show_timestamps', True),
font_family=self.config.get('display.font_family', 'Courier'),
font_size=self.config.get('display.font_size', 12)
)
main_layout.addWidget(self.transcription_display)
# Control buttons
control_widget = QWidget()
control_widget.setFixedHeight(80)
control_layout = QHBoxLayout()
control_widget.setLayout(control_layout)
self.start_button = QPushButton("▶ Start Transcription")
self.start_button.setFixedSize(240, 50)
button_font = QFont()
button_font.setPointSize(14)
button_font.setBold(True)
self.start_button.setFont(button_font)
self.start_button.clicked.connect(self._toggle_transcription)
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
control_layout.addWidget(self.start_button)
self.clear_button = QPushButton("Clear")
self.clear_button.setFixedSize(120, 50)
self.clear_button.clicked.connect(self._clear_transcriptions)
control_layout.addWidget(self.clear_button)
self.save_button = QPushButton("💾 Save")
self.save_button.setFixedSize(120, 50)
self.save_button.clicked.connect(self._save_transcriptions)
control_layout.addWidget(self.save_button)
control_layout.addStretch()
main_layout.addWidget(control_widget)
def _initialize_components(self):
"""Initialize audio, noise suppression, and transcription components."""
# Update status
self.status_label.setText("⚙ Initializing...")
# Set device based on config
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
# Initialize transcription engine
model_size = self.config.get('transcription.model', 'base')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
compute_type = self.device_manager.get_compute_type()
# Track current settings
self.current_model_size = model_size
self.current_device_config = device_config
self.transcription_engine = TranscriptionEngine(
model_size=model_size,
device=device,
compute_type=compute_type,
language=language,
min_confidence=self.config.get('processing.min_confidence', 0.5)
)
# Load model in background thread
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
self.model_loader_thread.finished.connect(self._on_model_loaded)
self.model_loader_thread.start()
def _on_model_loaded(self, success: bool, message: str):
"""Handle model loading completion."""
if success:
# Update device label with actual device used
if self.transcription_engine:
actual_device = self.transcription_engine.device
compute_type = self.transcription_engine.compute_type
device_display = f"{actual_device.upper()} ({compute_type})"
self.device_label.setText(f"Device: {device_display}")
host = self.config.get('web_server.host', '127.0.0.1')
port = self.config.get('web_server.port', 8080)
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
self.start_button.setEnabled(True)
else:
self.status_label.setText("❌ Model loading failed")
QMessageBox.critical(self, "Error", message)
self.start_button.setEnabled(False)
def _start_web_server_if_enabled(self):
"""Start web server."""
host = self.config.get('web_server.host', '127.0.0.1')
port = self.config.get('web_server.port', 8080)
show_timestamps = self.config.get('display.show_timestamps', True)
fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
print(f"Starting web server at http://{host}:{port}")
self.web_server = TranscriptionWebServer(
host=host,
port=port,
show_timestamps=show_timestamps,
fade_after_seconds=fade_after_seconds
)
self.web_server_thread = WebServerThread(self.web_server)
self.web_server_thread.start()
def _toggle_transcription(self):
"""Start or stop transcription."""
if not self.is_transcribing:
self._start_transcription()
else:
self._stop_transcription()
def _start_transcription(self):
"""Start transcription."""
try:
# Check if engine is ready
if not self.transcription_engine or not self.transcription_engine.is_loaded:
QMessageBox.critical(self, "Error", "Transcription engine not ready")
return
# Get audio device
audio_device_str = self.config.get('audio.input_device', 'default')
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
# Initialize audio capture
self.audio_capture = AudioCapture(
sample_rate=self.config.get('audio.sample_rate', 16000),
chunk_duration=self.config.get('audio.chunk_duration', 3.0),
overlap_duration=self.config.get('audio.overlap_duration', 0.5),
device=audio_device
)
# Initialize noise suppressor
self.noise_suppressor = NoiseSuppressor(
sample_rate=self.config.get('audio.sample_rate', 16000),
method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
strength=self.config.get('noise_suppression.strength', 0.7),
use_vad=self.config.get('processing.use_vad', True)
)
# Initialize server sync if enabled
if self.config.get('server_sync.enabled', False):
self._start_server_sync()
# Start recording
self.audio_capture.start_recording(callback=self._process_audio_chunk)
# Update UI
self.is_transcribing = True
self.start_button.setText("⏸ Stop Transcription")
self.start_button.setStyleSheet("background-color: #e74c3c; color: white;")
self.status_label.setText("🔴 Transcribing...")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to start transcription:\n{e}")
print(f"Error starting transcription: {e}")
def _stop_transcription(self):
"""Stop transcription."""
try:
# Stop recording
if self.audio_capture:
self.audio_capture.stop_recording()
# Stop server sync if running
if self.server_sync_client:
self.server_sync_client.stop()
self.server_sync_client = None
# Update UI
self.is_transcribing = False
self.start_button.setText("▶ Start Transcription")
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
self.status_label.setText("✓ Ready")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to stop transcription:\n{e}")
print(f"Error stopping transcription: {e}")
def _process_audio_chunk(self, audio_chunk):
"""Process an audio chunk (noise suppression + transcription)."""
def process():
try:
# Apply noise suppression
processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
# Skip if silent (VAD filtered it out)
if processed_audio is None:
return
# Transcribe
user_name = self.config.get('user.name', 'User')
result = self.transcription_engine.transcribe(
processed_audio,
sample_rate=self.config.get('audio.sample_rate', 16000),
user_name=user_name
)
# Display result (use Qt signal for thread safety)
if result:
# We need to update UI from main thread
# Note: We don't pass timestamp - let the display widget create it
from PySide6.QtCore import QMetaObject, Q_ARG
QMetaObject.invokeMethod(
self.transcription_display,
"add_transcription",
Qt.QueuedConnection,
Q_ARG(str, result.text),
Q_ARG(str, result.user_name)
)
# Broadcast to web server if enabled
if self.web_server and self.web_server_thread:
asyncio.run_coroutine_threadsafe(
self.web_server.broadcast_transcription(
result.text,
result.user_name,
result.timestamp
),
self.web_server_thread.loop
)
# Send to server sync if enabled
if self.server_sync_client:
self.server_sync_client.send_transcription(
result.text,
result.timestamp
)
except Exception as e:
print(f"Error processing audio: {e}")
import traceback
traceback.print_exc()
# Run in background thread
from threading import Thread
Thread(target=process, daemon=True).start()
def _clear_transcriptions(self):
"""Clear all transcriptions."""
reply = QMessageBox.question(
self,
"Clear Transcriptions",
"Are you sure you want to clear all transcriptions?",
QMessageBox.Yes | QMessageBox.No
)
if reply == QMessageBox.Yes:
self.transcription_display.clear_all()
def _save_transcriptions(self):
"""Save transcriptions to file."""
filepath, _ = QFileDialog.getSaveFileName(
self,
"Save Transcriptions",
"",
"Text files (*.txt);;All files (*.*)"
)
if filepath:
if self.transcription_display.save_to_file(filepath):
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
else:
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
def _open_settings(self):
"""Open settings dialog."""
# Get audio devices
audio_devices = AudioCapture.get_input_devices()
if not audio_devices:
audio_devices = [(0, "Default")]
# Get compute devices
compute_devices = self.device_manager.get_device_info()
compute_devices.insert(0, ("auto", "Auto-detect"))
# Open settings dialog
dialog = SettingsDialog(
self,
self.config,
audio_devices,
compute_devices,
on_save=self._on_settings_saved
)
dialog.exec()
def _on_settings_saved(self):
"""Handle settings being saved."""
# Update user label
user_name = self.config.get('user.name', 'User')
self.user_label.setText(f"User: {user_name}")
# Update display settings
show_timestamps = self.config.get('display.show_timestamps', True)
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
self.transcription_display.set_show_timestamps(show_timestamps)
self.transcription_display.set_font(
self.config.get('display.font_family', 'Courier'),
self.config.get('display.font_size', 12)
)
# Update web server settings
if self.web_server:
self.web_server.show_timestamps = show_timestamps
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
# Restart server sync if it was running and settings changed
if self.is_transcribing and self.server_sync_client:
# Stop old client
self.server_sync_client.stop()
self.server_sync_client = None
# Start new one if enabled
if self.config.get('server_sync.enabled', False):
self._start_server_sync()
# Check if model/device settings changed - reload model if needed
new_model = self.config.get('transcription.model', 'base')
new_device_config = self.config.get('transcription.device', 'auto')
# Only reload if model size or device changed
if self.current_model_size != new_model or self.current_device_config != new_device_config:
self._reload_model()
else:
QMessageBox.information(self, "Settings Saved", "Settings have been applied successfully!")
def _reload_model(self):
"""Reload the transcription model with new settings."""
# Stop transcription if running
was_transcribing = self.is_transcribing
if was_transcribing:
self._stop_transcription()
# Update status
self.status_label.setText("⚙ Reloading model...")
self.start_button.setEnabled(False)
# Unload current model
if self.transcription_engine:
self.transcription_engine.unload_model()
# Set device based on config
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
# Re-initialize transcription engine
model_size = self.config.get('transcription.model', 'base')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
compute_type = self.device_manager.get_compute_type()
# Update tracked settings
self.current_model_size = model_size
self.current_device_config = device_config
self.transcription_engine = TranscriptionEngine(
model_size=model_size,
device=device,
compute_type=compute_type,
language=language,
min_confidence=self.config.get('processing.min_confidence', 0.5)
)
# Load model in background thread
if self.model_loader_thread and self.model_loader_thread.isRunning():
self.model_loader_thread.wait()
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
self.model_loader_thread.finished.connect(self._on_model_reloaded)
self.model_loader_thread.start()
def _on_model_reloaded(self, success: bool, message: str):
"""Handle model reloading completion."""
if success:
# Update device label with actual device used
if self.transcription_engine:
actual_device = self.transcription_engine.device
compute_type = self.transcription_engine.compute_type
device_display = f"{actual_device.upper()} ({compute_type})"
self.device_label.setText(f"Device: {device_display}")
host = self.config.get('web_server.host', '127.0.0.1')
port = self.config.get('web_server.port', 8080)
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
self.start_button.setEnabled(True)
QMessageBox.information(self, "Settings Saved", "Model reloaded successfully with new settings!")
else:
self.status_label.setText("❌ Model loading failed")
QMessageBox.critical(self, "Error", f"Failed to reload model:\n{message}")
self.start_button.setEnabled(False)
def _start_server_sync(self):
"""Start server sync client."""
try:
url = self.config.get('server_sync.url', '')
room = self.config.get('server_sync.room', 'default')
passphrase = self.config.get('server_sync.passphrase', '')
user_name = self.config.get('user.name', 'User')
if not url:
print("Server sync enabled but no URL configured")
return
print(f"Starting server sync: {url}, room: {room}, user: {user_name}")
self.server_sync_client = ServerSyncClient(
url=url,
room=room,
passphrase=passphrase,
user_name=user_name
)
self.server_sync_client.start()
except Exception as e:
print(f"Error starting server sync: {e}")
QMessageBox.warning(
self,
"Server Sync Warning",
f"Failed to start server sync:\n{e}\n\nTranscription will continue locally."
)
def closeEvent(self, event):
"""Handle window closing."""
# Stop transcription if running
if self.is_transcribing:
self._stop_transcription()
# Unload model
if self.transcription_engine:
self.transcription_engine.unload_model()
# Wait for model loader thread
if self.model_loader_thread and self.model_loader_thread.isRunning():
self.model_loader_thread.wait()
event.accept()