"""Main application window for the local transcription app.""" import customtkinter as ctk from tkinter import filedialog, messagebox import threading from pathlib import Path import sys # Add parent directory to path for imports sys.path.append(str(Path(__file__).parent.parent)) from client.config import Config from client.device_utils import DeviceManager from client.audio_capture import AudioCapture from client.noise_suppression import NoiseSuppressor from client.transcription_engine import TranscriptionEngine from gui.transcription_display import TranscriptionDisplay from gui.settings_dialog import SettingsDialog class MainWindow(ctk.CTk): """Main application window.""" def __init__(self): """Initialize the main window.""" super().__init__() # Application state self.is_transcribing = False self.config = Config() self.device_manager = DeviceManager() # Components (initialized later) self.audio_capture: AudioCapture = None self.noise_suppressor: NoiseSuppressor = None self.transcription_engine: TranscriptionEngine = None # Configure window self.title("Local Transcription") self.geometry("900x700") # Set theme ctk.set_appearance_mode(self.config.get('display.theme', 'dark')) ctk.set_default_color_theme("blue") # Create UI self._create_widgets() # Handle window close self.protocol("WM_DELETE_WINDOW", self._on_closing) # Initialize components after GUI is ready (delay to avoid XCB threading issues) self.after(100, self._initialize_components) def _create_widgets(self): """Create all UI widgets.""" # Header frame header_frame = ctk.CTkFrame(self, height=80) header_frame.pack(fill="x", padx=10, pady=(10, 0)) header_frame.pack_propagate(False) # Title title_label = ctk.CTkLabel( header_frame, text="Local Transcription", font=("", 24, "bold") ) title_label.pack(side="left", padx=20, pady=20) # Settings button self.settings_button = ctk.CTkButton( header_frame, text="⚙ Settings", command=self._open_settings, width=120 ) self.settings_button.pack(side="right", padx=20, pady=20) # Status frame status_frame = ctk.CTkFrame(self, height=60) status_frame.pack(fill="x", padx=10, pady=(10, 0)) status_frame.pack_propagate(False) # Status label self.status_label = ctk.CTkLabel( status_frame, text="⚫ Ready", font=("", 14) ) self.status_label.pack(side="left", padx=20) # Device info device_info = self.device_manager.get_device_info() device_text = device_info[0][1] if device_info else "No device" self.device_label = ctk.CTkLabel( status_frame, text=f"Device: {device_text}", font=("", 12) ) self.device_label.pack(side="left", padx=20) # User name display user_name = self.config.get('user.name', 'User') self.user_label = ctk.CTkLabel( status_frame, text=f"User: {user_name}", font=("", 12) ) self.user_label.pack(side="left", padx=20) # Transcription display frame display_frame = ctk.CTkFrame(self) display_frame.pack(fill="both", expand=True, padx=10, pady=10) # Transcription display self.transcription_display = TranscriptionDisplay( display_frame, max_lines=self.config.get('display.max_lines', 100), show_timestamps=self.config.get('display.show_timestamps', True), font=("Courier", self.config.get('display.font_size', 12)) ) self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10) # Control frame control_frame = ctk.CTkFrame(self, height=80) control_frame.pack(fill="x", padx=10, pady=(0, 10)) control_frame.pack_propagate(False) # Start/Stop button self.start_button = ctk.CTkButton( control_frame, text="▶ Start Transcription", command=self._toggle_transcription, width=200, height=50, font=("", 16, "bold"), fg_color="green" ) self.start_button.pack(side="left", padx=20, pady=15) # Clear button self.clear_button = ctk.CTkButton( control_frame, text="Clear", command=self._clear_transcriptions, width=120, height=50 ) self.clear_button.pack(side="left", padx=10, pady=15) # Save button self.save_button = ctk.CTkButton( control_frame, text="💾 Save", command=self._save_transcriptions, width=120, height=50 ) self.save_button.pack(side="left", padx=10, pady=15) def _initialize_components(self): """Initialize audio, noise suppression, and transcription components.""" # Update status self.status_label.configure(text="⚙ Initializing...") self.update() try: # Set device based on config device_config = self.config.get('transcription.device', 'auto') self.device_manager.set_device(device_config) # Initialize transcription engine model_size = self.config.get('transcription.model', 'base') language = self.config.get('transcription.language', 'en') device = self.device_manager.get_device_for_whisper() compute_type = self.device_manager.get_compute_type() self.transcription_engine = TranscriptionEngine( model_size=model_size, device=device, compute_type=compute_type, language=language, min_confidence=self.config.get('processing.min_confidence', 0.5) ) # Load model (synchronously to avoid X11 threading issues) success = self.transcription_engine.load_model() if success: self.status_label.configure(text="✓ Ready") else: self.status_label.configure(text="❌ Model loading failed") messagebox.showerror("Error", "Failed to load transcription model") except Exception as e: print(f"Error initializing components: {e}") self.status_label.configure(text="❌ Initialization failed") messagebox.showerror("Error", f"Failed to initialize:\n{e}") def _update_status(self, status: str): """Update status label (thread-safe).""" self.after(0, lambda: self.status_label.configure(text=status)) def _toggle_transcription(self): """Start or stop transcription.""" if not self.is_transcribing: self._start_transcription() else: self._stop_transcription() def _start_transcription(self): """Start transcription.""" try: # Check if engine is ready if not self.transcription_engine or not self.transcription_engine.is_loaded: messagebox.showerror("Error", "Transcription engine not ready") return # Get audio device audio_device_str = self.config.get('audio.input_device', 'default') audio_device = None if audio_device_str == 'default' else int(audio_device_str) # Initialize audio capture self.audio_capture = AudioCapture( sample_rate=self.config.get('audio.sample_rate', 16000), chunk_duration=self.config.get('audio.chunk_duration', 3.0), device=audio_device ) # Initialize noise suppressor self.noise_suppressor = NoiseSuppressor( sample_rate=self.config.get('audio.sample_rate', 16000), method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none", strength=self.config.get('noise_suppression.strength', 0.7), use_vad=self.config.get('processing.use_vad', True) ) # Start recording self.audio_capture.start_recording(callback=self._process_audio_chunk) # Update UI self.is_transcribing = True self.start_button.configure(text="⏸ Stop Transcription", fg_color="red") self.status_label.configure(text="🔴 Recording...") except Exception as e: messagebox.showerror("Error", f"Failed to start transcription:\n{e}") print(f"Error starting transcription: {e}") def _stop_transcription(self): """Stop transcription.""" try: # Stop recording if self.audio_capture: self.audio_capture.stop_recording() # Update UI self.is_transcribing = False self.start_button.configure(text="▶ Start Transcription", fg_color="green") self.status_label.configure(text="✓ Ready") except Exception as e: messagebox.showerror("Error", f"Failed to stop transcription:\n{e}") print(f"Error stopping transcription: {e}") def _process_audio_chunk(self, audio_chunk): """Process an audio chunk (noise suppression + transcription).""" def process(): try: # Apply noise suppression processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True) # Skip if silent (VAD filtered it out) if processed_audio is None: return # Transcribe user_name = self.config.get('user.name', 'User') result = self.transcription_engine.transcribe( processed_audio, sample_rate=self.config.get('audio.sample_rate', 16000), user_name=user_name ) # Display result if result: self.after(0, lambda: self.transcription_display.add_transcription( text=result.text, user_name=result.user_name, timestamp=result.timestamp )) except Exception as e: print(f"Error processing audio: {e}") # Run in background thread threading.Thread(target=process, daemon=True).start() def _clear_transcriptions(self): """Clear all transcriptions.""" if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"): self.transcription_display.clear() def _save_transcriptions(self): """Save transcriptions to file.""" filepath = filedialog.asksaveasfilename( defaultextension=".txt", filetypes=[("Text files", "*.txt"), ("All files", "*.*")] ) if filepath: if self.transcription_display.save_to_file(filepath): messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}") else: messagebox.showerror("Error", "Failed to save transcriptions") def _open_settings(self): """Open settings dialog.""" # Get audio devices audio_devices = AudioCapture.get_input_devices() if not audio_devices: audio_devices = [(0, "Default")] # Get compute devices compute_devices = self.device_manager.get_device_info() compute_devices.insert(0, ("auto", "Auto-detect")) # Open settings dialog SettingsDialog( self, self.config, audio_devices, compute_devices, on_save=self._on_settings_saved ) def _on_settings_saved(self): """Handle settings being saved.""" # Update user label user_name = self.config.get('user.name', 'User') self.user_label.configure(text=f"User: {user_name}") # Update display settings self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100)) self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True)) # Note: Model/device changes require restart messagebox.showinfo( "Settings Saved", "Some settings (model size, device) require restarting the application to take effect." ) def _on_closing(self): """Handle window closing.""" # Stop transcription if running if self.is_transcribing: self._stop_transcription() # Unload model if self.transcription_engine: self.transcription_engine.unload_model() # Close window self.destroy()