gui/main_window.py

"""Main application window for the local transcription app."""

import customtkinter as ctk
from tkinter import filedialog, messagebox
import threading
from pathlib import Path
import sys

# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))

from client.config import Config
from client.device_utils import DeviceManager
from client.audio_capture import AudioCapture
from client.noise_suppression import NoiseSuppressor
from client.transcription_engine import TranscriptionEngine
from gui.transcription_display import TranscriptionDisplay
from gui.settings_dialog import SettingsDialog


class MainWindow(ctk.CTk):
    """Main application window."""

    def __init__(self):
        """Initialize the main window."""
        super().__init__()

        # Application state
        self.is_transcribing = False
        self.config = Config()
        self.device_manager = DeviceManager()

        # Components (initialized later)
        self.audio_capture: AudioCapture = None
        self.noise_suppressor: NoiseSuppressor = None
        self.transcription_engine: TranscriptionEngine = None

        # Configure window
        self.title("Local Transcription")
        self.geometry("900x700")

        # Set theme
        ctk.set_appearance_mode(self.config.get('display.theme', 'dark'))
        ctk.set_default_color_theme("blue")

        # Create UI
        self._create_widgets()

        # Handle window close
        self.protocol("WM_DELETE_WINDOW", self._on_closing)

        # Initialize components after GUI is ready (delay to avoid XCB threading issues)
        self.after(100, self._initialize_components)

    def _create_widgets(self):
        """Create all UI widgets."""
        # Header frame
        header_frame = ctk.CTkFrame(self, height=80)
        header_frame.pack(fill="x", padx=10, pady=(10, 0))
        header_frame.pack_propagate(False)

        # Title
        title_label = ctk.CTkLabel(
            header_frame,
            text="Local Transcription",
            font=("", 24, "bold")
        )
        title_label.pack(side="left", padx=20, pady=20)

        # Settings button
        self.settings_button = ctk.CTkButton(
            header_frame,
            text="⚙ Settings",
            command=self._open_settings,
            width=120
        )
        self.settings_button.pack(side="right", padx=20, pady=20)

        # Status frame
        status_frame = ctk.CTkFrame(self, height=60)
        status_frame.pack(fill="x", padx=10, pady=(10, 0))
        status_frame.pack_propagate(False)

        # Status label
        self.status_label = ctk.CTkLabel(
            status_frame,
            text="⚫ Ready",
            font=("", 14)
        )
        self.status_label.pack(side="left", padx=20)

        # Device info
        device_info = self.device_manager.get_device_info()
        device_text = device_info[0][1] if device_info else "No device"
        self.device_label = ctk.CTkLabel(
            status_frame,
            text=f"Device: {device_text}",
            font=("", 12)
        )
        self.device_label.pack(side="left", padx=20)

        # User name display
        user_name = self.config.get('user.name', 'User')
        self.user_label = ctk.CTkLabel(
            status_frame,
            text=f"User: {user_name}",
            font=("", 12)
        )
        self.user_label.pack(side="left", padx=20)

        # Transcription display frame
        display_frame = ctk.CTkFrame(self)
        display_frame.pack(fill="both", expand=True, padx=10, pady=10)

        # Transcription display
        self.transcription_display = TranscriptionDisplay(
            display_frame,
            max_lines=self.config.get('display.max_lines', 100),
            show_timestamps=self.config.get('display.show_timestamps', True),
            font=("Courier", self.config.get('display.font_size', 12))
        )
        self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10)

        # Control frame
        control_frame = ctk.CTkFrame(self, height=80)
        control_frame.pack(fill="x", padx=10, pady=(0, 10))
        control_frame.pack_propagate(False)

        # Start/Stop button
        self.start_button = ctk.CTkButton(
            control_frame,
            text="▶ Start Transcription",
            command=self._toggle_transcription,
            width=200,
            height=50,
            font=("", 16, "bold"),
            fg_color="green"
        )
        self.start_button.pack(side="left", padx=20, pady=15)

        # Clear button
        self.clear_button = ctk.CTkButton(
            control_frame,
            text="Clear",
            command=self._clear_transcriptions,
            width=120,
            height=50
        )
        self.clear_button.pack(side="left", padx=10, pady=15)

        # Save button
        self.save_button = ctk.CTkButton(
            control_frame,
            text="💾 Save",
            command=self._save_transcriptions,
            width=120,
            height=50
        )
        self.save_button.pack(side="left", padx=10, pady=15)

    def _initialize_components(self):
        """Initialize audio, noise suppression, and transcription components."""
        # Update status
        self.status_label.configure(text="⚙ Initializing...")
        self.update()

        try:
            # Set device based on config
            device_config = self.config.get('transcription.device', 'auto')
            self.device_manager.set_device(device_config)

            # Initialize transcription engine
            model_size = self.config.get('transcription.model', 'base')
            language = self.config.get('transcription.language', 'en')
            device = self.device_manager.get_device_for_whisper()
            compute_type = self.device_manager.get_compute_type()

            self.transcription_engine = TranscriptionEngine(
                model_size=model_size,
                device=device,
                compute_type=compute_type,
                language=language,
                min_confidence=self.config.get('processing.min_confidence', 0.5)
            )

            # Load model (synchronously to avoid X11 threading issues)
            success = self.transcription_engine.load_model()

            if success:
                self.status_label.configure(text="✓ Ready")
            else:
                self.status_label.configure(text="❌ Model loading failed")
                messagebox.showerror("Error", "Failed to load transcription model")

        except Exception as e:
            print(f"Error initializing components: {e}")
            self.status_label.configure(text="❌ Initialization failed")
            messagebox.showerror("Error", f"Failed to initialize:\n{e}")

    def _update_status(self, status: str):
        """Update status label (thread-safe)."""
        self.after(0, lambda: self.status_label.configure(text=status))

    def _toggle_transcription(self):
        """Start or stop transcription."""
        if not self.is_transcribing:
            self._start_transcription()
        else:
            self._stop_transcription()

    def _start_transcription(self):
        """Start transcription."""
        try:
            # Check if engine is ready
            if not self.transcription_engine or not self.transcription_engine.is_loaded:
                messagebox.showerror("Error", "Transcription engine not ready")
                return

            # Get audio device
            audio_device_str = self.config.get('audio.input_device', 'default')
            audio_device = None if audio_device_str == 'default' else int(audio_device_str)

            # Initialize audio capture
            self.audio_capture = AudioCapture(
                sample_rate=self.config.get('audio.sample_rate', 16000),
                chunk_duration=self.config.get('audio.chunk_duration', 3.0),
                device=audio_device
            )

            # Initialize noise suppressor
            self.noise_suppressor = NoiseSuppressor(
                sample_rate=self.config.get('audio.sample_rate', 16000),
                method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
                strength=self.config.get('noise_suppression.strength', 0.7),
                use_vad=self.config.get('processing.use_vad', True)
            )

            # Start recording
            self.audio_capture.start_recording(callback=self._process_audio_chunk)

            # Update UI
            self.is_transcribing = True
            self.start_button.configure(text="⏸ Stop Transcription", fg_color="red")
            self.status_label.configure(text="🔴 Recording...")

        except Exception as e:
            messagebox.showerror("Error", f"Failed to start transcription:\n{e}")
            print(f"Error starting transcription: {e}")

    def _stop_transcription(self):
        """Stop transcription."""
        try:
            # Stop recording
            if self.audio_capture:
                self.audio_capture.stop_recording()

            # Update UI
            self.is_transcribing = False
            self.start_button.configure(text="▶ Start Transcription", fg_color="green")
            self.status_label.configure(text="✓ Ready")

        except Exception as e:
            messagebox.showerror("Error", f"Failed to stop transcription:\n{e}")
            print(f"Error stopping transcription: {e}")

    def _process_audio_chunk(self, audio_chunk):
        """Process an audio chunk (noise suppression + transcription)."""
        def process():
            try:
                # Apply noise suppression
                processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)

                # Skip if silent (VAD filtered it out)
                if processed_audio is None:
                    return

                # Transcribe
                user_name = self.config.get('user.name', 'User')
                result = self.transcription_engine.transcribe(
                    processed_audio,
                    sample_rate=self.config.get('audio.sample_rate', 16000),
                    user_name=user_name
                )

                # Display result
                if result:
                    self.after(0, lambda: self.transcription_display.add_transcription(
                        text=result.text,
                        user_name=result.user_name,
                        timestamp=result.timestamp
                    ))

            except Exception as e:
                print(f"Error processing audio: {e}")

        # Run in background thread
        threading.Thread(target=process, daemon=True).start()

    def _clear_transcriptions(self):
        """Clear all transcriptions."""
        if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"):
            self.transcription_display.clear()

    def _save_transcriptions(self):
        """Save transcriptions to file."""
        filepath = filedialog.asksaveasfilename(
            defaultextension=".txt",
            filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
        )

        if filepath:
            if self.transcription_display.save_to_file(filepath):
                messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}")
            else:
                messagebox.showerror("Error", "Failed to save transcriptions")

    def _open_settings(self):
        """Open settings dialog."""
        # Get audio devices
        audio_devices = AudioCapture.get_input_devices()
        if not audio_devices:
            audio_devices = [(0, "Default")]

        # Get compute devices
        compute_devices = self.device_manager.get_device_info()
        compute_devices.insert(0, ("auto", "Auto-detect"))

        # Open settings dialog
        SettingsDialog(
            self,
            self.config,
            audio_devices,
            compute_devices,
            on_save=self._on_settings_saved
        )

    def _on_settings_saved(self):
        """Handle settings being saved."""
        # Update user label
        user_name = self.config.get('user.name', 'User')
        self.user_label.configure(text=f"User: {user_name}")

        # Update display settings
        self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
        self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True))

        # Note: Model/device changes require restart
        messagebox.showinfo(
            "Settings Saved",
            "Some settings (model size, device) require restarting the application to take effect."
        )

    def _on_closing(self):
        """Handle window closing."""
        # Stop transcription if running
        if self.is_transcribing:
            self._stop_transcription()

        # Unload model
        if self.transcription_engine:
            self.transcription_engine.unload_model()

        # Close window
        self.destroy()
Initial commit: Local Transcription App v1.0 Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-12-25 18:48:23 -08:00			`"""Main application window for the local transcription app."""`

			`import customtkinter as ctk`
			`from tkinter import filedialog, messagebox`
			`import threading`
			`from pathlib import Path`
			`import sys`

			`# Add parent directory to path for imports`
			`sys.path.append(str(Path(__file__).parent.parent))`

			`from client.config import Config`
			`from client.device_utils import DeviceManager`
			`from client.audio_capture import AudioCapture`
			`from client.noise_suppression import NoiseSuppressor`
			`from client.transcription_engine import TranscriptionEngine`
			`from gui.transcription_display import TranscriptionDisplay`
			`from gui.settings_dialog import SettingsDialog`


			`class MainWindow(ctk.CTk):`
			`"""Main application window."""`

			`def __init__(self):`
			`"""Initialize the main window."""`
			`super().__init__()`

			`# Application state`
			`self.is_transcribing = False`
			`self.config = Config()`
			`self.device_manager = DeviceManager()`

			`# Components (initialized later)`
			`self.audio_capture: AudioCapture = None`
			`self.noise_suppressor: NoiseSuppressor = None`
			`self.transcription_engine: TranscriptionEngine = None`

			`# Configure window`
			`self.title("Local Transcription")`
			`self.geometry("900x700")`

			`# Set theme`
			`ctk.set_appearance_mode(self.config.get('display.theme', 'dark'))`
			`ctk.set_default_color_theme("blue")`

			`# Create UI`
			`self._create_widgets()`

			`# Handle window close`
			`self.protocol("WM_DELETE_WINDOW", self._on_closing)`

			`# Initialize components after GUI is ready (delay to avoid XCB threading issues)`
			`self.after(100, self._initialize_components)`

			`def _create_widgets(self):`
			`"""Create all UI widgets."""`
			`# Header frame`
			`header_frame = ctk.CTkFrame(self, height=80)`
			`header_frame.pack(fill="x", padx=10, pady=(10, 0))`
			`header_frame.pack_propagate(False)`

			`# Title`
			`title_label = ctk.CTkLabel(`
			`header_frame,`
			`text="Local Transcription",`
			`font=("", 24, "bold")`
			`)`
			`title_label.pack(side="left", padx=20, pady=20)`

			`# Settings button`
			`self.settings_button = ctk.CTkButton(`
			`header_frame,`
			`text="⚙ Settings",`
			`command=self._open_settings,`
			`width=120`
			`)`
			`self.settings_button.pack(side="right", padx=20, pady=20)`

			`# Status frame`
			`status_frame = ctk.CTkFrame(self, height=60)`
			`status_frame.pack(fill="x", padx=10, pady=(10, 0))`
			`status_frame.pack_propagate(False)`

			`# Status label`
			`self.status_label = ctk.CTkLabel(`
			`status_frame,`
			`text="⚫ Ready",`
			`font=("", 14)`
			`)`
			`self.status_label.pack(side="left", padx=20)`

			`# Device info`
			`device_info = self.device_manager.get_device_info()`
			`device_text = device_info[0][1] if device_info else "No device"`
			`self.device_label = ctk.CTkLabel(`
			`status_frame,`
			`text=f"Device: {device_text}",`
			`font=("", 12)`
			`)`
			`self.device_label.pack(side="left", padx=20)`

			`# User name display`
			`user_name = self.config.get('user.name', 'User')`
			`self.user_label = ctk.CTkLabel(`
			`status_frame,`
			`text=f"User: {user_name}",`
			`font=("", 12)`
			`)`
			`self.user_label.pack(side="left", padx=20)`

			`# Transcription display frame`
			`display_frame = ctk.CTkFrame(self)`
			`display_frame.pack(fill="both", expand=True, padx=10, pady=10)`

			`# Transcription display`
			`self.transcription_display = TranscriptionDisplay(`
			`display_frame,`
			`max_lines=self.config.get('display.max_lines', 100),`
			`show_timestamps=self.config.get('display.show_timestamps', True),`
			`font=("Courier", self.config.get('display.font_size', 12))`
			`)`
			`self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10)`

			`# Control frame`
			`control_frame = ctk.CTkFrame(self, height=80)`
			`control_frame.pack(fill="x", padx=10, pady=(0, 10))`
			`control_frame.pack_propagate(False)`

			`# Start/Stop button`
			`self.start_button = ctk.CTkButton(`
			`control_frame,`
			`text="▶ Start Transcription",`
			`command=self._toggle_transcription,`
			`width=200,`
			`height=50,`
			`font=("", 16, "bold"),`
			`fg_color="green"`
			`)`
			`self.start_button.pack(side="left", padx=20, pady=15)`

			`# Clear button`
			`self.clear_button = ctk.CTkButton(`
			`control_frame,`
			`text="Clear",`
			`command=self._clear_transcriptions,`
			`width=120,`
			`height=50`
			`)`
			`self.clear_button.pack(side="left", padx=10, pady=15)`

			`# Save button`
			`self.save_button = ctk.CTkButton(`
			`control_frame,`
			`text="💾 Save",`
			`command=self._save_transcriptions,`
			`width=120,`
			`height=50`
			`)`
			`self.save_button.pack(side="left", padx=10, pady=15)`

			`def _initialize_components(self):`
			`"""Initialize audio, noise suppression, and transcription components."""`
			`# Update status`
			`self.status_label.configure(text="⚙ Initializing...")`
			`self.update()`

			`try:`
			`# Set device based on config`
			`device_config = self.config.get('transcription.device', 'auto')`
			`self.device_manager.set_device(device_config)`

			`# Initialize transcription engine`
			`model_size = self.config.get('transcription.model', 'base')`
			`language = self.config.get('transcription.language', 'en')`
			`device = self.device_manager.get_device_for_whisper()`
			`compute_type = self.device_manager.get_compute_type()`

			`self.transcription_engine = TranscriptionEngine(`
			`model_size=model_size,`
			`device=device,`
			`compute_type=compute_type,`
			`language=language,`
			`min_confidence=self.config.get('processing.min_confidence', 0.5)`
			`)`

			`# Load model (synchronously to avoid X11 threading issues)`
			`success = self.transcription_engine.load_model()`

			`if success:`
			`self.status_label.configure(text="✓ Ready")`
			`else:`
			`self.status_label.configure(text="❌ Model loading failed")`
			`messagebox.showerror("Error", "Failed to load transcription model")`

			`except Exception as e:`
			`print(f"Error initializing components: {e}")`
			`self.status_label.configure(text="❌ Initialization failed")`
			`messagebox.showerror("Error", f"Failed to initialize:\n{e}")`

			`def _update_status(self, status: str):`
			`"""Update status label (thread-safe)."""`
			`self.after(0, lambda: self.status_label.configure(text=status))`

			`def _toggle_transcription(self):`
			`"""Start or stop transcription."""`
			`if not self.is_transcribing:`
			`self._start_transcription()`
			`else:`
			`self._stop_transcription()`

			`def _start_transcription(self):`
			`"""Start transcription."""`
			`try:`
			`# Check if engine is ready`
			`if not self.transcription_engine or not self.transcription_engine.is_loaded:`
			`messagebox.showerror("Error", "Transcription engine not ready")`
			`return`

			`# Get audio device`
			`audio_device_str = self.config.get('audio.input_device', 'default')`
			`audio_device = None if audio_device_str == 'default' else int(audio_device_str)`

			`# Initialize audio capture`
			`self.audio_capture = AudioCapture(`
			`sample_rate=self.config.get('audio.sample_rate', 16000),`
			`chunk_duration=self.config.get('audio.chunk_duration', 3.0),`
			`device=audio_device`
			`)`

			`# Initialize noise suppressor`
			`self.noise_suppressor = NoiseSuppressor(`
			`sample_rate=self.config.get('audio.sample_rate', 16000),`
			`method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",`
			`strength=self.config.get('noise_suppression.strength', 0.7),`
			`use_vad=self.config.get('processing.use_vad', True)`
			`)`

			`# Start recording`
			`self.audio_capture.start_recording(callback=self._process_audio_chunk)`

			`# Update UI`
			`self.is_transcribing = True`
			`self.start_button.configure(text="⏸ Stop Transcription", fg_color="red")`
			`self.status_label.configure(text="🔴 Recording...")`

			`except Exception as e:`
			`messagebox.showerror("Error", f"Failed to start transcription:\n{e}")`
			`print(f"Error starting transcription: {e}")`

			`def _stop_transcription(self):`
			`"""Stop transcription."""`
			`try:`
			`# Stop recording`
			`if self.audio_capture:`
			`self.audio_capture.stop_recording()`

			`# Update UI`
			`self.is_transcribing = False`
			`self.start_button.configure(text="▶ Start Transcription", fg_color="green")`
			`self.status_label.configure(text="✓ Ready")`

			`except Exception as e:`
			`messagebox.showerror("Error", f"Failed to stop transcription:\n{e}")`
			`print(f"Error stopping transcription: {e}")`

			`def _process_audio_chunk(self, audio_chunk):`
			`"""Process an audio chunk (noise suppression + transcription)."""`
			`def process():`
			`try:`
			`# Apply noise suppression`
			`processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)`

			`# Skip if silent (VAD filtered it out)`
			`if processed_audio is None:`
			`return`

			`# Transcribe`
			`user_name = self.config.get('user.name', 'User')`
			`result = self.transcription_engine.transcribe(`
			`processed_audio,`
			`sample_rate=self.config.get('audio.sample_rate', 16000),`
			`user_name=user_name`
			`)`

			`# Display result`
			`if result:`
			`self.after(0, lambda: self.transcription_display.add_transcription(`
			`text=result.text,`
			`user_name=result.user_name,`
			`timestamp=result.timestamp`
			`))`

			`except Exception as e:`
			`print(f"Error processing audio: {e}")`

			`# Run in background thread`
			`threading.Thread(target=process, daemon=True).start()`

			`def _clear_transcriptions(self):`
			`"""Clear all transcriptions."""`
			`if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"):`
			`self.transcription_display.clear()`

			`def _save_transcriptions(self):`
			`"""Save transcriptions to file."""`
			`filepath = filedialog.asksaveasfilename(`
			`defaultextension=".txt",`
			`filetypes=[("Text files", ".txt"), ("All files", ".*")]`
			`)`

			`if filepath:`
			`if self.transcription_display.save_to_file(filepath):`
			`messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}")`
			`else:`
			`messagebox.showerror("Error", "Failed to save transcriptions")`

			`def _open_settings(self):`
			`"""Open settings dialog."""`
			`# Get audio devices`
			`audio_devices = AudioCapture.get_input_devices()`
			`if not audio_devices:`
			`audio_devices = [(0, "Default")]`

			`# Get compute devices`
			`compute_devices = self.device_manager.get_device_info()`
			`compute_devices.insert(0, ("auto", "Auto-detect"))`

			`# Open settings dialog`
			`SettingsDialog(`
			`self,`
			`self.config,`
			`audio_devices,`
			`compute_devices,`
			`on_save=self._on_settings_saved`
			`)`

			`def _on_settings_saved(self):`
			`"""Handle settings being saved."""`
			`# Update user label`
			`user_name = self.config.get('user.name', 'User')`
			`self.user_label.configure(text=f"User: {user_name}")`

			`# Update display settings`
			`self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))`
			`self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True))`

			`# Note: Model/device changes require restart`
			`messagebox.showinfo(`
			`"Settings Saved",`
			`"Some settings (model size, device) require restarting the application to take effect."`
			`)`

			`def _on_closing(self):`
			`"""Handle window closing."""`
			`# Stop transcription if running`
			`if self.is_transcribing:`
			`self._stop_transcription()`

			`# Unload model`
			`if self.transcription_engine:`
			`self.transcription_engine.unload_model()`

			`# Close window`
			`self.destroy()`