"""PySide6 main application window for the local transcription app.""" from PySide6.QtWidgets import ( QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QFileDialog, QMessageBox ) from PySide6.QtCore import Qt, QThread, Signal from PySide6.QtGui import QFont from pathlib import Path import sys # Add parent directory to path for imports sys.path.append(str(Path(__file__).parent.parent)) from client.config import Config from client.device_utils import DeviceManager from client.audio_capture import AudioCapture from client.noise_suppression import NoiseSuppressor from client.transcription_engine import TranscriptionEngine from client.server_sync import ServerSyncClient from gui.transcription_display_qt import TranscriptionDisplay from gui.settings_dialog_qt import SettingsDialog from server.web_display import TranscriptionWebServer import asyncio from threading import Thread class WebServerThread(Thread): """Thread for running the web server.""" def __init__(self, web_server): super().__init__(daemon=True) self.web_server = web_server self.loop = None self.error = None def run(self): """Run the web server in async event loop.""" try: self.loop = asyncio.new_event_loop() asyncio.set_event_loop(self.loop) self.loop.run_until_complete(self.web_server.start()) except Exception as e: self.error = e print(f"ERROR: Web server failed to start: {e}") import traceback traceback.print_exc() class ModelLoaderThread(QThread): """Thread for loading the Whisper model without blocking the GUI.""" finished = Signal(bool, str) # success, message def __init__(self, transcription_engine): super().__init__() self.transcription_engine = transcription_engine def run(self): """Load the model in background thread.""" try: success = self.transcription_engine.load_model() if success: self.finished.emit(True, "Model loaded successfully") else: self.finished.emit(False, "Failed to load model") except Exception as e: self.finished.emit(False, f"Error loading model: {e}") class MainWindow(QMainWindow): """Main application window using PySide6.""" def __init__(self): """Initialize the main window.""" super().__init__() # Application state self.is_transcribing = False self.config = Config() self.device_manager = DeviceManager() # Components (initialized later) self.audio_capture: AudioCapture = None self.noise_suppressor: NoiseSuppressor = None self.transcription_engine: TranscriptionEngine = None self.model_loader_thread: ModelLoaderThread = None # Track current model settings self.current_model_size: str = None self.current_device_config: str = None # Web server components self.web_server: TranscriptionWebServer = None self.web_server_thread: WebServerThread = None # Server sync components self.server_sync_client: ServerSyncClient = None # Configure window self.setWindowTitle("Local Transcription") self.resize(900, 700) # Create UI self._create_widgets() # Initialize components (in background) self._initialize_components() # Start web server if enabled self._start_web_server_if_enabled() def _create_widgets(self): """Create all UI widgets.""" # Central widget central_widget = QWidget() self.setCentralWidget(central_widget) main_layout = QVBoxLayout() central_widget.setLayout(main_layout) # Header header_widget = QWidget() header_widget.setFixedHeight(80) header_layout = QHBoxLayout() header_widget.setLayout(header_layout) title_label = QLabel("Local Transcription") title_font = QFont() title_font.setPointSize(24) title_font.setBold(True) title_label.setFont(title_font) header_layout.addWidget(title_label) header_layout.addStretch() self.settings_button = QPushButton("⚙ Settings") self.settings_button.setFixedSize(120, 40) self.settings_button.clicked.connect(self._open_settings) header_layout.addWidget(self.settings_button) main_layout.addWidget(header_widget) # Status bar status_widget = QWidget() status_widget.setFixedHeight(60) status_layout = QHBoxLayout() status_widget.setLayout(status_layout) self.status_label = QLabel("⚫ Initializing...") status_font = QFont() status_font.setPointSize(14) self.status_label.setFont(status_font) status_layout.addWidget(self.status_label) device_info = self.device_manager.get_device_info() device_text = device_info[0][1] if device_info else "No device" self.device_label = QLabel(f"Device: {device_text}") status_layout.addWidget(self.device_label) user_name = self.config.get('user.name', 'User') self.user_label = QLabel(f"User: {user_name}") status_layout.addWidget(self.user_label) # Web display link web_host = self.config.get('web_server.host', '127.0.0.1') web_port = self.config.get('web_server.port', 8080) web_url = f"http://{web_host}:{web_port}" self.web_link = QLabel(f'🌐 Open Web Display') self.web_link.setOpenExternalLinks(True) self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)") self.web_link.setStyleSheet("QLabel { color: #4CAF50; }") status_layout.addWidget(self.web_link) status_layout.addStretch() main_layout.addWidget(status_widget) # Transcription display self.transcription_display = TranscriptionDisplay( max_lines=self.config.get('display.max_lines', 100), show_timestamps=self.config.get('display.show_timestamps', True), font_family=self.config.get('display.font_family', 'Courier'), font_size=self.config.get('display.font_size', 12) ) main_layout.addWidget(self.transcription_display) # Control buttons control_widget = QWidget() control_widget.setFixedHeight(80) control_layout = QHBoxLayout() control_widget.setLayout(control_layout) self.start_button = QPushButton("▶ Start Transcription") self.start_button.setFixedSize(240, 50) button_font = QFont() button_font.setPointSize(14) button_font.setBold(True) self.start_button.setFont(button_font) self.start_button.clicked.connect(self._toggle_transcription) self.start_button.setStyleSheet("background-color: #2ecc71; color: white;") control_layout.addWidget(self.start_button) self.clear_button = QPushButton("Clear") self.clear_button.setFixedSize(120, 50) self.clear_button.clicked.connect(self._clear_transcriptions) control_layout.addWidget(self.clear_button) self.save_button = QPushButton("💾 Save") self.save_button.setFixedSize(120, 50) self.save_button.clicked.connect(self._save_transcriptions) control_layout.addWidget(self.save_button) control_layout.addStretch() main_layout.addWidget(control_widget) def _initialize_components(self): """Initialize audio, noise suppression, and transcription components.""" # Update status self.status_label.setText("⚙ Initializing...") # Set device based on config device_config = self.config.get('transcription.device', 'auto') self.device_manager.set_device(device_config) # Initialize transcription engine model_size = self.config.get('transcription.model', 'base') language = self.config.get('transcription.language', 'en') device = self.device_manager.get_device_for_whisper() compute_type = self.device_manager.get_compute_type() # Track current settings self.current_model_size = model_size self.current_device_config = device_config self.transcription_engine = TranscriptionEngine( model_size=model_size, device=device, compute_type=compute_type, language=language, min_confidence=self.config.get('processing.min_confidence', 0.5) ) # Load model in background thread self.model_loader_thread = ModelLoaderThread(self.transcription_engine) self.model_loader_thread.finished.connect(self._on_model_loaded) self.model_loader_thread.start() def _on_model_loaded(self, success: bool, message: str): """Handle model loading completion.""" if success: # Update device label with actual device used if self.transcription_engine: actual_device = self.transcription_engine.device compute_type = self.transcription_engine.compute_type device_display = f"{actual_device.upper()} ({compute_type})" self.device_label.setText(f"Device: {device_display}") host = self.config.get('web_server.host', '127.0.0.1') port = self.config.get('web_server.port', 8080) self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}") self.start_button.setEnabled(True) else: self.status_label.setText("❌ Model loading failed") QMessageBox.critical(self, "Error", message) self.start_button.setEnabled(False) def _start_web_server_if_enabled(self): """Start web server.""" try: host = self.config.get('web_server.host', '127.0.0.1') port = self.config.get('web_server.port', 8080) show_timestamps = self.config.get('display.show_timestamps', True) fade_after_seconds = self.config.get('display.fade_after_seconds', 10) # Try up to 5 ports if the default is in use ports_to_try = [port] + [port + i for i in range(1, 5)] server_started = False for try_port in ports_to_try: print(f"Attempting to start web server at http://{host}:{try_port}") self.web_server = TranscriptionWebServer( host=host, port=try_port, show_timestamps=show_timestamps, fade_after_seconds=fade_after_seconds ) self.web_server_thread = WebServerThread(self.web_server) self.web_server_thread.start() # Give it a moment to start and check for errors import time time.sleep(0.5) if self.web_server_thread.error: error_str = str(self.web_server_thread.error) # Check if it's a port-in-use error if "address already in use" in error_str.lower() or "errno 98" in error_str.lower(): print(f"Port {try_port} is in use, trying next port...") self.web_server = None self.web_server_thread = None continue else: # Different error, don't retry print(f"Web server failed to start: {self.web_server_thread.error}") self.web_server = None self.web_server_thread = None break else: # Success! print(f"✓ Web server started successfully at http://{host}:{try_port}") if try_port != port: print(f" Note: Using port {try_port} instead of configured port {port}") server_started = True break if not server_started: print(f"WARNING: Could not start web server on any port from {ports_to_try[0]} to {ports_to_try[-1]}") except Exception as e: print(f"ERROR: Failed to initialize web server: {e}") import traceback traceback.print_exc() self.web_server = None self.web_server_thread = None def _toggle_transcription(self): """Start or stop transcription.""" if not self.is_transcribing: self._start_transcription() else: self._stop_transcription() def _start_transcription(self): """Start transcription.""" try: # Check if engine is ready if not self.transcription_engine or not self.transcription_engine.is_loaded: QMessageBox.critical(self, "Error", "Transcription engine not ready") return # Get audio device audio_device_str = self.config.get('audio.input_device', 'default') audio_device = None if audio_device_str == 'default' else int(audio_device_str) # Initialize audio capture self.audio_capture = AudioCapture( sample_rate=self.config.get('audio.sample_rate', 16000), chunk_duration=self.config.get('audio.chunk_duration', 3.0), overlap_duration=self.config.get('audio.overlap_duration', 0.5), device=audio_device ) # Initialize noise suppressor self.noise_suppressor = NoiseSuppressor( sample_rate=self.config.get('audio.sample_rate', 16000), method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none", strength=self.config.get('noise_suppression.strength', 0.7), use_vad=self.config.get('processing.use_vad', True) ) # Initialize server sync if enabled if self.config.get('server_sync.enabled', False): self._start_server_sync() # Start recording self.audio_capture.start_recording(callback=self._process_audio_chunk) # Update UI self.is_transcribing = True self.start_button.setText("⏸ Stop Transcription") self.start_button.setStyleSheet("background-color: #e74c3c; color: white;") self.status_label.setText("🔴 Transcribing...") except Exception as e: QMessageBox.critical(self, "Error", f"Failed to start transcription:\n{e}") print(f"Error starting transcription: {e}") def _stop_transcription(self): """Stop transcription.""" try: # Stop recording if self.audio_capture: self.audio_capture.stop_recording() # Stop server sync if running if self.server_sync_client: self.server_sync_client.stop() self.server_sync_client = None # Update UI self.is_transcribing = False self.start_button.setText("▶ Start Transcription") self.start_button.setStyleSheet("background-color: #2ecc71; color: white;") self.status_label.setText("✓ Ready") except Exception as e: QMessageBox.critical(self, "Error", f"Failed to stop transcription:\n{e}") print(f"Error stopping transcription: {e}") def _process_audio_chunk(self, audio_chunk): """Process an audio chunk (noise suppression + transcription).""" def process(): try: # Apply noise suppression processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True) # Skip if silent (VAD filtered it out) if processed_audio is None: return # Transcribe user_name = self.config.get('user.name', 'User') result = self.transcription_engine.transcribe( processed_audio, sample_rate=self.config.get('audio.sample_rate', 16000), user_name=user_name ) # Display result (use Qt signal for thread safety) if result: # We need to update UI from main thread # Note: We don't pass timestamp - let the display widget create it from PySide6.QtCore import QMetaObject, Q_ARG QMetaObject.invokeMethod( self.transcription_display, "add_transcription", Qt.QueuedConnection, Q_ARG(str, result.text), Q_ARG(str, result.user_name) ) # Broadcast to web server if enabled if self.web_server and self.web_server_thread: asyncio.run_coroutine_threadsafe( self.web_server.broadcast_transcription( result.text, result.user_name, result.timestamp ), self.web_server_thread.loop ) # Send to server sync if enabled if self.server_sync_client: import time sync_start = time.time() print(f"[GUI] Sending to server sync: '{result.text[:50]}...'") self.server_sync_client.send_transcription( result.text, result.timestamp ) sync_queue_time = (time.time() - sync_start) * 1000 print(f"[GUI] Queued for sync in: {sync_queue_time:.1f}ms") except Exception as e: print(f"Error processing audio: {e}") import traceback traceback.print_exc() # Run in background thread from threading import Thread Thread(target=process, daemon=True).start() def _clear_transcriptions(self): """Clear all transcriptions.""" reply = QMessageBox.question( self, "Clear Transcriptions", "Are you sure you want to clear all transcriptions?", QMessageBox.Yes | QMessageBox.No ) if reply == QMessageBox.Yes: self.transcription_display.clear_all() def _save_transcriptions(self): """Save transcriptions to file.""" filepath, _ = QFileDialog.getSaveFileName( self, "Save Transcriptions", "", "Text files (*.txt);;All files (*.*)" ) if filepath: if self.transcription_display.save_to_file(filepath): QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}") else: QMessageBox.critical(self, "Error", "Failed to save transcriptions") def _open_settings(self): """Open settings dialog.""" # Get audio devices audio_devices = AudioCapture.get_input_devices() if not audio_devices: audio_devices = [(0, "Default")] # Get compute devices compute_devices = self.device_manager.get_device_info() compute_devices.insert(0, ("auto", "Auto-detect")) # Open settings dialog dialog = SettingsDialog( self, self.config, audio_devices, compute_devices, on_save=self._on_settings_saved ) dialog.exec() def _on_settings_saved(self): """Handle settings being saved.""" # Update user label user_name = self.config.get('user.name', 'User') self.user_label.setText(f"User: {user_name}") # Update display settings show_timestamps = self.config.get('display.show_timestamps', True) self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100)) self.transcription_display.set_show_timestamps(show_timestamps) self.transcription_display.set_font( self.config.get('display.font_family', 'Courier'), self.config.get('display.font_size', 12) ) # Update web server settings if self.web_server: self.web_server.show_timestamps = show_timestamps self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10) # Restart server sync if it was running and settings changed if self.is_transcribing and self.server_sync_client: # Stop old client self.server_sync_client.stop() self.server_sync_client = None # Start new one if enabled if self.config.get('server_sync.enabled', False): self._start_server_sync() # Check if model/device settings changed - reload model if needed new_model = self.config.get('transcription.model', 'base') new_device_config = self.config.get('transcription.device', 'auto') # Only reload if model size or device changed if self.current_model_size != new_model or self.current_device_config != new_device_config: self._reload_model() else: QMessageBox.information(self, "Settings Saved", "Settings have been applied successfully!") def _reload_model(self): """Reload the transcription model with new settings.""" try: # Stop transcription if running was_transcribing = self.is_transcribing if was_transcribing: self._stop_transcription() # Update status self.status_label.setText("⚙ Reloading model...") self.start_button.setEnabled(False) # Unload current model if self.transcription_engine: try: self.transcription_engine.unload_model() except Exception as e: print(f"Warning: Error unloading model: {e}") # Set device based on config device_config = self.config.get('transcription.device', 'auto') self.device_manager.set_device(device_config) # Re-initialize transcription engine model_size = self.config.get('transcription.model', 'base') language = self.config.get('transcription.language', 'en') device = self.device_manager.get_device_for_whisper() compute_type = self.device_manager.get_compute_type() # Update tracked settings self.current_model_size = model_size self.current_device_config = device_config self.transcription_engine = TranscriptionEngine( model_size=model_size, device=device, compute_type=compute_type, language=language, min_confidence=self.config.get('processing.min_confidence', 0.5) ) # Load model in background thread if self.model_loader_thread and self.model_loader_thread.isRunning(): self.model_loader_thread.wait() self.model_loader_thread = ModelLoaderThread(self.transcription_engine) self.model_loader_thread.finished.connect(self._on_model_reloaded) self.model_loader_thread.start() except Exception as e: error_msg = f"Error during model reload: {e}" print(error_msg) import traceback traceback.print_exc() self.status_label.setText("❌ Model reload failed") self.start_button.setEnabled(False) QMessageBox.critical(self, "Error", error_msg) def _on_model_reloaded(self, success: bool, message: str): """Handle model reloading completion.""" if success: # Update device label with actual device used if self.transcription_engine: actual_device = self.transcription_engine.device compute_type = self.transcription_engine.compute_type device_display = f"{actual_device.upper()} ({compute_type})" self.device_label.setText(f"Device: {device_display}") host = self.config.get('web_server.host', '127.0.0.1') port = self.config.get('web_server.port', 8080) self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}") self.start_button.setEnabled(True) QMessageBox.information(self, "Settings Saved", "Model reloaded successfully with new settings!") else: self.status_label.setText("❌ Model loading failed") QMessageBox.critical(self, "Error", f"Failed to reload model:\n{message}") self.start_button.setEnabled(False) def _start_server_sync(self): """Start server sync client.""" try: url = self.config.get('server_sync.url', '') room = self.config.get('server_sync.room', 'default') passphrase = self.config.get('server_sync.passphrase', '') user_name = self.config.get('user.name', 'User') if not url: print("Server sync enabled but no URL configured") return print(f"Starting server sync: {url}, room: {room}, user: {user_name}") self.server_sync_client = ServerSyncClient( url=url, room=room, passphrase=passphrase, user_name=user_name ) self.server_sync_client.start() except Exception as e: print(f"Error starting server sync: {e}") QMessageBox.warning( self, "Server Sync Warning", f"Failed to start server sync:\n{e}\n\nTranscription will continue locally." ) def closeEvent(self, event): """Handle window closing.""" # Stop transcription if running if self.is_transcribing: self._stop_transcription() # Stop web server if self.web_server_thread and self.web_server_thread.is_alive(): try: print("Shutting down web server...") if self.web_server_thread.loop: self.web_server_thread.loop.call_soon_threadsafe(self.web_server_thread.loop.stop) except Exception as e: print(f"Warning: Error stopping web server: {e}") # Unload model if self.transcription_engine: try: self.transcription_engine.unload_model() except Exception as e: print(f"Warning: Error unloading model: {e}") # Wait for model loader thread if self.model_loader_thread and self.model_loader_thread.isRunning(): self.model_loader_thread.wait() event.accept()