local-transcription/gui/main_window.py

"""Main application window for the local transcription app."""

import customtkinter as ctk
from tkinter import filedialog, messagebox
import threading
from pathlib import Path
import sys

# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))

from client.config import Config
from client.device_utils import DeviceManager
from client.audio_capture import AudioCapture
from client.noise_suppression import NoiseSuppressor
from client.transcription_engine import TranscriptionEngine
from gui.transcription_display import TranscriptionDisplay
from gui.settings_dialog import SettingsDialog


class MainWindow(ctk.CTk):
    """Main application window."""

    def __init__(self):
        """Initialize the main window."""
        super().__init__()

        # Application state
        self.is_transcribing = False
        self.config = Config()
        self.device_manager = DeviceManager()

        # Components (initialized later)
        self.audio_capture: AudioCapture = None
        self.noise_suppressor: NoiseSuppressor = None
        self.transcription_engine: TranscriptionEngine = None

        # Configure window
        self.title("Local Transcription")
        self.geometry("900x700")

        # Set theme
        ctk.set_appearance_mode(self.config.get('display.theme', 'dark'))
        ctk.set_default_color_theme("blue")

        # Create UI
        self._create_widgets()

        # Handle window close
        self.protocol("WM_DELETE_WINDOW", self._on_closing)

        # Initialize components after GUI is ready (delay to avoid XCB threading issues)
        self.after(100, self._initialize_components)

    def _create_widgets(self):
        """Create all UI widgets."""
        # Header frame
        header_frame = ctk.CTkFrame(self, height=80)
        header_frame.pack(fill="x", padx=10, pady=(10, 0))
        header_frame.pack_propagate(False)

        # Title
        title_label = ctk.CTkLabel(
            header_frame,
            text="Local Transcription",
            font=("", 24, "bold")
        )
        title_label.pack(side="left", padx=20, pady=20)

        # Settings button
        self.settings_button = ctk.CTkButton(
            header_frame,
            text="⚙ Settings",
            command=self._open_settings,
            width=120
        )
        self.settings_button.pack(side="right", padx=20, pady=20)

        # Status frame
        status_frame = ctk.CTkFrame(self, height=60)
        status_frame.pack(fill="x", padx=10, pady=(10, 0))
        status_frame.pack_propagate(False)

        # Status label
        self.status_label = ctk.CTkLabel(
            status_frame,
            text="⚫ Ready",
            font=("", 14)
        )
        self.status_label.pack(side="left", padx=20)

        # Device info
        device_info = self.device_manager.get_device_info()
        device_text = device_info[0][1] if device_info else "No device"
        self.device_label = ctk.CTkLabel(
            status_frame,
            text=f"Device: {device_text}",
            font=("", 12)
        )
        self.device_label.pack(side="left", padx=20)

        # User name display
        user_name = self.config.get('user.name', 'User')
        self.user_label = ctk.CTkLabel(
            status_frame,
            text=f"User: {user_name}",
            font=("", 12)
        )
        self.user_label.pack(side="left", padx=20)

        # Transcription display frame
        display_frame = ctk.CTkFrame(self)
        display_frame.pack(fill="both", expand=True, padx=10, pady=10)

        # Transcription display
        self.transcription_display = TranscriptionDisplay(
            display_frame,
            max_lines=self.config.get('display.max_lines', 100),
            show_timestamps=self.config.get('display.show_timestamps', True),
            font=("Courier", self.config.get('display.font_size', 12))
        )
        self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10)

        # Control frame
        control_frame = ctk.CTkFrame(self, height=80)
        control_frame.pack(fill="x", padx=10, pady=(0, 10))
        control_frame.pack_propagate(False)

        # Start/Stop button
        self.start_button = ctk.CTkButton(
            control_frame,
            text="▶ Start Transcription",
            command=self._toggle_transcription,
            width=200,
            height=50,
            font=("", 16, "bold"),
            fg_color="green"
        )
        self.start_button.pack(side="left", padx=20, pady=15)

        # Clear button
        self.clear_button = ctk.CTkButton(
            control_frame,
            text="Clear",
            command=self._clear_transcriptions,
            width=120,
            height=50
        )
        self.clear_button.pack(side="left", padx=10, pady=15)

        # Save button
        self.save_button = ctk.CTkButton(
            control_frame,
            text="💾 Save",
            command=self._save_transcriptions,
            width=120,
            height=50
        )
        self.save_button.pack(side="left", padx=10, pady=15)

    def _initialize_components(self):
        """Initialize audio, noise suppression, and transcription components."""
        # Update status
        self.status_label.configure(text="⚙ Initializing...")
        self.update()

        try:
            # Set device based on config
            device_config = self.config.get('transcription.device', 'auto')
            self.device_manager.set_device(device_config)

            # Initialize transcription engine
            model_size = self.config.get('transcription.model', 'base')
            language = self.config.get('transcription.language', 'en')
            device = self.device_manager.get_device_for_whisper()
            compute_type = self.device_manager.get_compute_type()

            self.transcription_engine = TranscriptionEngine(
                model_size=model_size,
                device=device,
                compute_type=compute_type,
                language=language,
                min_confidence=self.config.get('processing.min_confidence', 0.5)
            )

            # Load model (synchronously to avoid X11 threading issues)
            success = self.transcription_engine.load_model()

            if success:
                self.status_label.configure(text="✓ Ready")
            else:
                self.status_label.configure(text="❌ Model loading failed")
                messagebox.showerror("Error", "Failed to load transcription model")

        except Exception as e:
            print(f"Error initializing components: {e}")
            self.status_label.configure(text="❌ Initialization failed")
            messagebox.showerror("Error", f"Failed to initialize:\n{e}")

    def _update_status(self, status: str):
        """Update status label (thread-safe)."""
        self.after(0, lambda: self.status_label.configure(text=status))

    def _toggle_transcription(self):
        """Start or stop transcription."""
        if not self.is_transcribing:
            self._start_transcription()
        else:
            self._stop_transcription()

    def _start_transcription(self):
        """Start transcription."""
        try:
            # Check if engine is ready
            if not self.transcription_engine or not self.transcription_engine.is_loaded:
                messagebox.showerror("Error", "Transcription engine not ready")
                return

            # Get audio device
            audio_device_str = self.config.get('audio.input_device', 'default')
            audio_device = None if audio_device_str == 'default' else int(audio_device_str)

            # Initialize audio capture
            self.audio_capture = AudioCapture(
                sample_rate=self.config.get('audio.sample_rate', 16000),
                chunk_duration=self.config.get('audio.chunk_duration', 3.0),
                device=audio_device
            )

            # Initialize noise suppressor
            self.noise_suppressor = NoiseSuppressor(
                sample_rate=self.config.get('audio.sample_rate', 16000),
                method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
                strength=self.config.get('noise_suppression.strength', 0.7),
                use_vad=self.config.get('processing.use_vad', True)
            )

            # Start recording
            self.audio_capture.start_recording(callback=self._process_audio_chunk)

            # Update UI
            self.is_transcribing = True
            self.start_button.configure(text="⏸ Stop Transcription", fg_color="red")
            self.status_label.configure(text="🔴 Recording...")

        except Exception as e:
            messagebox.showerror("Error", f"Failed to start transcription:\n{e}")
            print(f"Error starting transcription: {e}")

    def _stop_transcription(self):
        """Stop transcription."""
        try:
            # Stop recording
            if self.audio_capture:
                self.audio_capture.stop_recording()

            # Update UI
            self.is_transcribing = False
            self.start_button.configure(text="▶ Start Transcription", fg_color="green")
            self.status_label.configure(text="✓ Ready")

        except Exception as e:
            messagebox.showerror("Error", f"Failed to stop transcription:\n{e}")
            print(f"Error stopping transcription: {e}")

    def _process_audio_chunk(self, audio_chunk):
        """Process an audio chunk (noise suppression + transcription)."""
        def process():
            try:
                # Apply noise suppression
                processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)

                # Skip if silent (VAD filtered it out)
                if processed_audio is None:
                    return

                # Transcribe
                user_name = self.config.get('user.name', 'User')
                result = self.transcription_engine.transcribe(
                    processed_audio,
                    sample_rate=self.config.get('audio.sample_rate', 16000),
                    user_name=user_name
                )

                # Display result
                if result:
                    self.after(0, lambda: self.transcription_display.add_transcription(
                        text=result.text,
                        user_name=result.user_name,
                        timestamp=result.timestamp
                    ))

            except Exception as e:
                print(f"Error processing audio: {e}")

        # Run in background thread
        threading.Thread(target=process, daemon=True).start()

    def _clear_transcriptions(self):
        """Clear all transcriptions."""
        if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"):
            self.transcription_display.clear()

    def _save_transcriptions(self):
        """Save transcriptions to file."""
        filepath = filedialog.asksaveasfilename(
            defaultextension=".txt",
            filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
        )

        if filepath:
            if self.transcription_display.save_to_file(filepath):
                messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}")
            else:
                messagebox.showerror("Error", "Failed to save transcriptions")

    def _open_settings(self):
        """Open settings dialog."""
        # Get audio devices
        audio_devices = AudioCapture.get_input_devices()
        if not audio_devices:
            audio_devices = [(0, "Default")]

        # Get compute devices
        compute_devices = self.device_manager.get_device_info()
        compute_devices.insert(0, ("auto", "Auto-detect"))

        # Open settings dialog
        SettingsDialog(
            self,
            self.config,
            audio_devices,
            compute_devices,
            on_save=self._on_settings_saved
        )

    def _on_settings_saved(self):
        """Handle settings being saved."""
        # Update user label
        user_name = self.config.get('user.name', 'User')
        self.user_label.configure(text=f"User: {user_name}")

        # Update display settings
        self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
        self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True))

        # Note: Model/device changes require restart
        messagebox.showinfo(
            "Settings Saved",
            "Some settings (model size, device) require restarting the application to take effect."
        )

    def _on_closing(self):
        """Handle window closing."""
        # Stop transcription if running
        if self.is_transcribing:
            self._stop_transcription()

        # Unload model
        if self.transcription_engine:
            self.transcription_engine.unload_model()

        # Close window
        self.destroy()