Initial commit: Local Transcription App v1.0

Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-25 18:48:23 -08:00
commit 472233aec4
31 changed files with 5116 additions and 0 deletions
--- a/gui/init.py
+++ b/gui/init.py
--- a/gui/main_window.py
+++ b/gui/main_window.py
@@ -0,0 +1,364 @@
+"""Main application window for the local transcription app."""
+
+import customtkinter as ctk
+from tkinter import filedialog, messagebox
+import threading
+from pathlib import Path
+import sys
+
+# Add parent directory to path for imports
+sys.path.append(str(Path(__file__).parent.parent))
+
+from client.config import Config
+from client.device_utils import DeviceManager
+from client.audio_capture import AudioCapture
+from client.noise_suppression import NoiseSuppressor
+from client.transcription_engine import TranscriptionEngine
+from gui.transcription_display import TranscriptionDisplay
+from gui.settings_dialog import SettingsDialog
+
+
+class MainWindow(ctk.CTk):
+    """Main application window."""
+
+    def __init__(self):
+        """Initialize the main window."""
+        super().__init__()
+
+        # Application state
+        self.is_transcribing = False
+        self.config = Config()
+        self.device_manager = DeviceManager()
+
+        # Components (initialized later)
+        self.audio_capture: AudioCapture = None
+        self.noise_suppressor: NoiseSuppressor = None
+        self.transcription_engine: TranscriptionEngine = None
+
+        # Configure window
+        self.title("Local Transcription")
+        self.geometry("900x700")
+
+        # Set theme
+        ctk.set_appearance_mode(self.config.get('display.theme', 'dark'))
+        ctk.set_default_color_theme("blue")
+
+        # Create UI
+        self._create_widgets()
+
+        # Handle window close
+        self.protocol("WM_DELETE_WINDOW", self._on_closing)
+
+        # Initialize components after GUI is ready (delay to avoid XCB threading issues)
+        self.after(100, self._initialize_components)
+
+    def _create_widgets(self):
+        """Create all UI widgets."""
+        # Header frame
+        header_frame = ctk.CTkFrame(self, height=80)
+        header_frame.pack(fill="x", padx=10, pady=(10, 0))
+        header_frame.pack_propagate(False)
+
+        # Title
+        title_label = ctk.CTkLabel(
+            header_frame,
+            text="Local Transcription",
+            font=("", 24, "bold")
+        )
+        title_label.pack(side="left", padx=20, pady=20)
+
+        # Settings button
+        self.settings_button = ctk.CTkButton(
+            header_frame,
+            text="⚙ Settings",
+            command=self._open_settings,
+            width=120
+        )
+        self.settings_button.pack(side="right", padx=20, pady=20)
+
+        # Status frame
+        status_frame = ctk.CTkFrame(self, height=60)
+        status_frame.pack(fill="x", padx=10, pady=(10, 0))
+        status_frame.pack_propagate(False)
+
+        # Status label
+        self.status_label = ctk.CTkLabel(
+            status_frame,
+            text="⚫ Ready",
+            font=("", 14)
+        )
+        self.status_label.pack(side="left", padx=20)
+
+        # Device info
+        device_info = self.device_manager.get_device_info()
+        device_text = device_info[0][1] if device_info else "No device"
+        self.device_label = ctk.CTkLabel(
+            status_frame,
+            text=f"Device: {device_text}",
+            font=("", 12)
+        )
+        self.device_label.pack(side="left", padx=20)
+
+        # User name display
+        user_name = self.config.get('user.name', 'User')
+        self.user_label = ctk.CTkLabel(
+            status_frame,
+            text=f"User: {user_name}",
+            font=("", 12)
+        )
+        self.user_label.pack(side="left", padx=20)
+
+        # Transcription display frame
+        display_frame = ctk.CTkFrame(self)
+        display_frame.pack(fill="both", expand=True, padx=10, pady=10)
+
+        # Transcription display
+        self.transcription_display = TranscriptionDisplay(
+            display_frame,
+            max_lines=self.config.get('display.max_lines', 100),
+            show_timestamps=self.config.get('display.show_timestamps', True),
+            font=("Courier", self.config.get('display.font_size', 12))
+        )
+        self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10)
+
+        # Control frame
+        control_frame = ctk.CTkFrame(self, height=80)
+        control_frame.pack(fill="x", padx=10, pady=(0, 10))
+        control_frame.pack_propagate(False)
+
+        # Start/Stop button
+        self.start_button = ctk.CTkButton(
+            control_frame,
+            text="▶ Start Transcription",
+            command=self._toggle_transcription,
+            width=200,
+            height=50,
+            font=("", 16, "bold"),
+            fg_color="green"
+        )
+        self.start_button.pack(side="left", padx=20, pady=15)
+
+        # Clear button
+        self.clear_button = ctk.CTkButton(
+            control_frame,
+            text="Clear",
+            command=self._clear_transcriptions,
+            width=120,
+            height=50
+        )
+        self.clear_button.pack(side="left", padx=10, pady=15)
+
+        # Save button
+        self.save_button = ctk.CTkButton(
+            control_frame,
+            text="💾 Save",
+            command=self._save_transcriptions,
+            width=120,
+            height=50
+        )
+        self.save_button.pack(side="left", padx=10, pady=15)
+
+    def _initialize_components(self):
+        """Initialize audio, noise suppression, and transcription components."""
+        # Update status
+        self.status_label.configure(text="⚙ Initializing...")
+        self.update()
+
+        try:
+            # Set device based on config
+            device_config = self.config.get('transcription.device', 'auto')
+            self.device_manager.set_device(device_config)
+
+            # Initialize transcription engine
+            model_size = self.config.get('transcription.model', 'base')
+            language = self.config.get('transcription.language', 'en')
+            device = self.device_manager.get_device_for_whisper()
+            compute_type = self.device_manager.get_compute_type()
+
+            self.transcription_engine = TranscriptionEngine(
+                model_size=model_size,
+                device=device,
+                compute_type=compute_type,
+                language=language,
+                min_confidence=self.config.get('processing.min_confidence', 0.5)
+            )
+
+            # Load model (synchronously to avoid X11 threading issues)
+            success = self.transcription_engine.load_model()
+
+            if success:
+                self.status_label.configure(text="✓ Ready")
+            else:
+                self.status_label.configure(text="❌ Model loading failed")
+                messagebox.showerror("Error", "Failed to load transcription model")
+
+        except Exception as e:
+            print(f"Error initializing components: {e}")
+            self.status_label.configure(text="❌ Initialization failed")
+            messagebox.showerror("Error", f"Failed to initialize:\n{e}")
+
+    def _update_status(self, status: str):
+        """Update status label (thread-safe)."""
+        self.after(0, lambda: self.status_label.configure(text=status))
+
+    def _toggle_transcription(self):
+        """Start or stop transcription."""
+        if not self.is_transcribing:
+            self._start_transcription()
+        else:
+            self._stop_transcription()
+
+    def _start_transcription(self):
+        """Start transcription."""
+        try:
+            # Check if engine is ready
+            if not self.transcription_engine or not self.transcription_engine.is_loaded:
+                messagebox.showerror("Error", "Transcription engine not ready")
+                return
+
+            # Get audio device
+            audio_device_str = self.config.get('audio.input_device', 'default')
+            audio_device = None if audio_device_str == 'default' else int(audio_device_str)
+
+            # Initialize audio capture
+            self.audio_capture = AudioCapture(
+                sample_rate=self.config.get('audio.sample_rate', 16000),
+                chunk_duration=self.config.get('audio.chunk_duration', 3.0),
+                device=audio_device
+            )
+
+            # Initialize noise suppressor
+            self.noise_suppressor = NoiseSuppressor(
+                sample_rate=self.config.get('audio.sample_rate', 16000),
+                method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
+                strength=self.config.get('noise_suppression.strength', 0.7),
+                use_vad=self.config.get('processing.use_vad', True)
+            )
+
+            # Start recording
+            self.audio_capture.start_recording(callback=self._process_audio_chunk)
+
+            # Update UI
+            self.is_transcribing = True
+            self.start_button.configure(text="⏸ Stop Transcription", fg_color="red")
+            self.status_label.configure(text="🔴 Recording...")
+
+        except Exception as e:
+            messagebox.showerror("Error", f"Failed to start transcription:\n{e}")
+            print(f"Error starting transcription: {e}")
+
+    def _stop_transcription(self):
+        """Stop transcription."""
+        try:
+            # Stop recording
+            if self.audio_capture:
+                self.audio_capture.stop_recording()
+
+            # Update UI
+            self.is_transcribing = False
+            self.start_button.configure(text="▶ Start Transcription", fg_color="green")
+            self.status_label.configure(text="✓ Ready")
+
+        except Exception as e:
+            messagebox.showerror("Error", f"Failed to stop transcription:\n{e}")
+            print(f"Error stopping transcription: {e}")
+
+    def _process_audio_chunk(self, audio_chunk):
+        """Process an audio chunk (noise suppression + transcription)."""
+        def process():
+            try:
+                # Apply noise suppression
+                processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
+
+                # Skip if silent (VAD filtered it out)
+                if processed_audio is None:
+                    return
+
+                # Transcribe
+                user_name = self.config.get('user.name', 'User')
+                result = self.transcription_engine.transcribe(
+                    processed_audio,
+                    sample_rate=self.config.get('audio.sample_rate', 16000),
+                    user_name=user_name
+                )
+
+                # Display result
+                if result:
+                    self.after(0, lambda: self.transcription_display.add_transcription(
+                        text=result.text,
+                        user_name=result.user_name,
+                        timestamp=result.timestamp
+                    ))
+
+            except Exception as e:
+                print(f"Error processing audio: {e}")
+
+        # Run in background thread
+        threading.Thread(target=process, daemon=True).start()
+
+    def _clear_transcriptions(self):
+        """Clear all transcriptions."""
+        if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"):
+            self.transcription_display.clear()
+
+    def _save_transcriptions(self):
+        """Save transcriptions to file."""
+        filepath = filedialog.asksaveasfilename(
+            defaultextension=".txt",
+            filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
+        )
+
+        if filepath:
+            if self.transcription_display.save_to_file(filepath):
+                messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}")
+            else:
+                messagebox.showerror("Error", "Failed to save transcriptions")
+
+    def _open_settings(self):
+        """Open settings dialog."""
+        # Get audio devices
+        audio_devices = AudioCapture.get_input_devices()
+        if not audio_devices:
+            audio_devices = [(0, "Default")]
+
+        # Get compute devices
+        compute_devices = self.device_manager.get_device_info()
+        compute_devices.insert(0, ("auto", "Auto-detect"))
+
+        # Open settings dialog
+        SettingsDialog(
+            self,
+            self.config,
+            audio_devices,
+            compute_devices,
+            on_save=self._on_settings_saved
+        )
+
+    def _on_settings_saved(self):
+        """Handle settings being saved."""
+        # Update user label
+        user_name = self.config.get('user.name', 'User')
+        self.user_label.configure(text=f"User: {user_name}")
+
+        # Update display settings
+        self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
+        self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True))
+
+        # Note: Model/device changes require restart
+        messagebox.showinfo(
+            "Settings Saved",
+            "Some settings (model size, device) require restarting the application to take effect."
+        )
+
+    def _on_closing(self):
+        """Handle window closing."""
+        # Stop transcription if running
+        if self.is_transcribing:
+            self._stop_transcription()
+
+        # Unload model
+        if self.transcription_engine:
+            self.transcription_engine.unload_model()
+
+        # Close window
+        self.destroy()
--- a/gui/main_window_qt.py
+++ b/gui/main_window_qt.py
@@ -0,0 +1,524 @@
+"""PySide6 main application window for the local transcription app."""
+
+from PySide6.QtWidgets import (
+    QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
+    QPushButton, QLabel, QFileDialog, QMessageBox
+)
+from PySide6.QtCore import Qt, QThread, Signal
+from PySide6.QtGui import QFont
+from pathlib import Path
+import sys
+
+# Add parent directory to path for imports
+sys.path.append(str(Path(__file__).parent.parent))
+
+from client.config import Config
+from client.device_utils import DeviceManager
+from client.audio_capture import AudioCapture
+from client.noise_suppression import NoiseSuppressor
+from client.transcription_engine import TranscriptionEngine
+from gui.transcription_display_qt import TranscriptionDisplay
+from gui.settings_dialog_qt import SettingsDialog
+from server.web_display import TranscriptionWebServer
+import asyncio
+from threading import Thread
+
+
+class WebServerThread(Thread):
+    """Thread for running the web server."""
+
+    def __init__(self, web_server):
+        super().__init__(daemon=True)
+        self.web_server = web_server
+        self.loop = None
+
+    def run(self):
+        """Run the web server in async event loop."""
+        self.loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self.loop)
+        self.loop.run_until_complete(self.web_server.start())
+
+
+class ModelLoaderThread(QThread):
+    """Thread for loading the Whisper model without blocking the GUI."""
+
+    finished = Signal(bool, str)  # success, message
+
+    def __init__(self, transcription_engine):
+        super().__init__()
+        self.transcription_engine = transcription_engine
+
+    def run(self):
+        """Load the model in background thread."""
+        try:
+            success = self.transcription_engine.load_model()
+            if success:
+                self.finished.emit(True, "Model loaded successfully")
+            else:
+                self.finished.emit(False, "Failed to load model")
+        except Exception as e:
+            self.finished.emit(False, f"Error loading model: {e}")
+
+
+class MainWindow(QMainWindow):
+    """Main application window using PySide6."""
+
+    def __init__(self):
+        """Initialize the main window."""
+        super().__init__()
+
+        # Application state
+        self.is_transcribing = False
+        self.config = Config()
+        self.device_manager = DeviceManager()
+
+        # Components (initialized later)
+        self.audio_capture: AudioCapture = None
+        self.noise_suppressor: NoiseSuppressor = None
+        self.transcription_engine: TranscriptionEngine = None
+        self.model_loader_thread: ModelLoaderThread = None
+
+        # Track current model settings
+        self.current_model_size: str = None
+        self.current_device_config: str = None
+
+        # Web server components
+        self.web_server: TranscriptionWebServer = None
+        self.web_server_thread: WebServerThread = None
+
+        # Configure window
+        self.setWindowTitle("Local Transcription")
+        self.resize(900, 700)
+
+        # Create UI
+        self._create_widgets()
+
+        # Initialize components (in background)
+        self._initialize_components()
+
+        # Start web server if enabled
+        self._start_web_server_if_enabled()
+
+    def _create_widgets(self):
+        """Create all UI widgets."""
+        # Central widget
+        central_widget = QWidget()
+        self.setCentralWidget(central_widget)
+
+        main_layout = QVBoxLayout()
+        central_widget.setLayout(main_layout)
+
+        # Header
+        header_widget = QWidget()
+        header_widget.setFixedHeight(80)
+        header_layout = QHBoxLayout()
+        header_widget.setLayout(header_layout)
+
+        title_label = QLabel("Local Transcription")
+        title_font = QFont()
+        title_font.setPointSize(24)
+        title_font.setBold(True)
+        title_label.setFont(title_font)
+        header_layout.addWidget(title_label)
+
+        header_layout.addStretch()
+
+        self.settings_button = QPushButton("⚙ Settings")
+        self.settings_button.setFixedSize(120, 40)
+        self.settings_button.clicked.connect(self._open_settings)
+        header_layout.addWidget(self.settings_button)
+
+        main_layout.addWidget(header_widget)
+
+        # Status bar
+        status_widget = QWidget()
+        status_widget.setFixedHeight(60)
+        status_layout = QHBoxLayout()
+        status_widget.setLayout(status_layout)
+
+        self.status_label = QLabel("⚫ Initializing...")
+        status_font = QFont()
+        status_font.setPointSize(14)
+        self.status_label.setFont(status_font)
+        status_layout.addWidget(self.status_label)
+
+        device_info = self.device_manager.get_device_info()
+        device_text = device_info[0][1] if device_info else "No device"
+        self.device_label = QLabel(f"Device: {device_text}")
+        status_layout.addWidget(self.device_label)
+
+        user_name = self.config.get('user.name', 'User')
+        self.user_label = QLabel(f"User: {user_name}")
+        status_layout.addWidget(self.user_label)
+
+        status_layout.addStretch()
+
+        main_layout.addWidget(status_widget)
+
+        # Transcription display
+        self.transcription_display = TranscriptionDisplay(
+            max_lines=self.config.get('display.max_lines', 100),
+            show_timestamps=self.config.get('display.show_timestamps', True),
+            font_family=self.config.get('display.font_family', 'Courier'),
+            font_size=self.config.get('display.font_size', 12)
+        )
+        main_layout.addWidget(self.transcription_display)
+
+        # Control buttons
+        control_widget = QWidget()
+        control_widget.setFixedHeight(80)
+        control_layout = QHBoxLayout()
+        control_widget.setLayout(control_layout)
+
+        self.start_button = QPushButton("▶ Start Transcription")
+        self.start_button.setFixedSize(240, 50)
+        button_font = QFont()
+        button_font.setPointSize(14)
+        button_font.setBold(True)
+        self.start_button.setFont(button_font)
+        self.start_button.clicked.connect(self._toggle_transcription)
+        self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
+        control_layout.addWidget(self.start_button)
+
+        self.clear_button = QPushButton("Clear")
+        self.clear_button.setFixedSize(120, 50)
+        self.clear_button.clicked.connect(self._clear_transcriptions)
+        control_layout.addWidget(self.clear_button)
+
+        self.save_button = QPushButton("💾 Save")
+        self.save_button.setFixedSize(120, 50)
+        self.save_button.clicked.connect(self._save_transcriptions)
+        control_layout.addWidget(self.save_button)
+
+        control_layout.addStretch()
+
+        main_layout.addWidget(control_widget)
+
+    def _initialize_components(self):
+        """Initialize audio, noise suppression, and transcription components."""
+        # Update status
+        self.status_label.setText("⚙ Initializing...")
+
+        # Set device based on config
+        device_config = self.config.get('transcription.device', 'auto')
+        self.device_manager.set_device(device_config)
+
+        # Initialize transcription engine
+        model_size = self.config.get('transcription.model', 'base')
+        language = self.config.get('transcription.language', 'en')
+        device = self.device_manager.get_device_for_whisper()
+        compute_type = self.device_manager.get_compute_type()
+
+        # Track current settings
+        self.current_model_size = model_size
+        self.current_device_config = device_config
+
+        self.transcription_engine = TranscriptionEngine(
+            model_size=model_size,
+            device=device,
+            compute_type=compute_type,
+            language=language,
+            min_confidence=self.config.get('processing.min_confidence', 0.5)
+        )
+
+        # Load model in background thread
+        self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
+        self.model_loader_thread.finished.connect(self._on_model_loaded)
+        self.model_loader_thread.start()
+
+    def _on_model_loaded(self, success: bool, message: str):
+        """Handle model loading completion."""
+        if success:
+            host = self.config.get('web_server.host', '127.0.0.1')
+            port = self.config.get('web_server.port', 8080)
+            self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
+            self.start_button.setEnabled(True)
+        else:
+            self.status_label.setText("❌ Model loading failed")
+            QMessageBox.critical(self, "Error", message)
+            self.start_button.setEnabled(False)
+
+    def _start_web_server_if_enabled(self):
+        """Start web server."""
+        host = self.config.get('web_server.host', '127.0.0.1')
+        port = self.config.get('web_server.port', 8080)
+        show_timestamps = self.config.get('display.show_timestamps', True)
+        fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
+
+        print(f"Starting web server at http://{host}:{port}")
+        self.web_server = TranscriptionWebServer(
+            host=host,
+            port=port,
+            show_timestamps=show_timestamps,
+            fade_after_seconds=fade_after_seconds
+        )
+        self.web_server_thread = WebServerThread(self.web_server)
+        self.web_server_thread.start()
+
+    def _toggle_transcription(self):
+        """Start or stop transcription."""
+        if not self.is_transcribing:
+            self._start_transcription()
+        else:
+            self._stop_transcription()
+
+    def _start_transcription(self):
+        """Start transcription."""
+        try:
+            # Check if engine is ready
+            if not self.transcription_engine or not self.transcription_engine.is_loaded:
+                QMessageBox.critical(self, "Error", "Transcription engine not ready")
+                return
+
+            # Get audio device
+            audio_device_str = self.config.get('audio.input_device', 'default')
+            audio_device = None if audio_device_str == 'default' else int(audio_device_str)
+
+            # Initialize audio capture
+            self.audio_capture = AudioCapture(
+                sample_rate=self.config.get('audio.sample_rate', 16000),
+                chunk_duration=self.config.get('audio.chunk_duration', 3.0),
+                device=audio_device
+            )
+
+            # Initialize noise suppressor
+            self.noise_suppressor = NoiseSuppressor(
+                sample_rate=self.config.get('audio.sample_rate', 16000),
+                method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
+                strength=self.config.get('noise_suppression.strength', 0.7),
+                use_vad=self.config.get('processing.use_vad', True)
+            )
+
+            # Start recording
+            self.audio_capture.start_recording(callback=self._process_audio_chunk)
+
+            # Update UI
+            self.is_transcribing = True
+            self.start_button.setText("⏸ Stop Transcription")
+            self.start_button.setStyleSheet("background-color: #e74c3c; color: white;")
+            self.status_label.setText("🔴 Recording...")
+
+        except Exception as e:
+            QMessageBox.critical(self, "Error", f"Failed to start transcription:\n{e}")
+            print(f"Error starting transcription: {e}")
+
+    def _stop_transcription(self):
+        """Stop transcription."""
+        try:
+            # Stop recording
+            if self.audio_capture:
+                self.audio_capture.stop_recording()
+
+            # Update UI
+            self.is_transcribing = False
+            self.start_button.setText("▶ Start Transcription")
+            self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
+            self.status_label.setText("✓ Ready")
+
+        except Exception as e:
+            QMessageBox.critical(self, "Error", f"Failed to stop transcription:\n{e}")
+            print(f"Error stopping transcription: {e}")
+
+    def _process_audio_chunk(self, audio_chunk):
+        """Process an audio chunk (noise suppression + transcription)."""
+        def process():
+            try:
+                # Apply noise suppression
+                processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
+
+                # Skip if silent (VAD filtered it out)
+                if processed_audio is None:
+                    return
+
+                # Transcribe
+                user_name = self.config.get('user.name', 'User')
+                result = self.transcription_engine.transcribe(
+                    processed_audio,
+                    sample_rate=self.config.get('audio.sample_rate', 16000),
+                    user_name=user_name
+                )
+
+                # Display result (use Qt signal for thread safety)
+                if result:
+                    # We need to update UI from main thread
+                    # Note: We don't pass timestamp - let the display widget create it
+                    from PySide6.QtCore import QMetaObject, Q_ARG
+                    QMetaObject.invokeMethod(
+                        self.transcription_display,
+                        "add_transcription",
+                        Qt.QueuedConnection,
+                        Q_ARG(str, result.text),
+                        Q_ARG(str, result.user_name)
+                    )
+
+                    # Broadcast to web server if enabled
+                    if self.web_server and self.web_server_thread:
+                        asyncio.run_coroutine_threadsafe(
+                            self.web_server.broadcast_transcription(
+                                result.text,
+                                result.user_name,
+                                result.timestamp
+                            ),
+                            self.web_server_thread.loop
+                        )
+
+            except Exception as e:
+                print(f"Error processing audio: {e}")
+                import traceback
+                traceback.print_exc()
+
+        # Run in background thread
+        from threading import Thread
+        Thread(target=process, daemon=True).start()
+
+    def _clear_transcriptions(self):
+        """Clear all transcriptions."""
+        reply = QMessageBox.question(
+            self,
+            "Clear Transcriptions",
+            "Are you sure you want to clear all transcriptions?",
+            QMessageBox.Yes | QMessageBox.No
+        )
+
+        if reply == QMessageBox.Yes:
+            self.transcription_display.clear_all()
+
+    def _save_transcriptions(self):
+        """Save transcriptions to file."""
+        filepath, _ = QFileDialog.getSaveFileName(
+            self,
+            "Save Transcriptions",
+            "",
+            "Text files (*.txt);;All files (*.*)"
+        )
+
+        if filepath:
+            if self.transcription_display.save_to_file(filepath):
+                QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
+            else:
+                QMessageBox.critical(self, "Error", "Failed to save transcriptions")
+
+    def _open_settings(self):
+        """Open settings dialog."""
+        # Get audio devices
+        audio_devices = AudioCapture.get_input_devices()
+        if not audio_devices:
+            audio_devices = [(0, "Default")]
+
+        # Get compute devices
+        compute_devices = self.device_manager.get_device_info()
+        compute_devices.insert(0, ("auto", "Auto-detect"))
+
+        # Open settings dialog
+        dialog = SettingsDialog(
+            self,
+            self.config,
+            audio_devices,
+            compute_devices,
+            on_save=self._on_settings_saved
+        )
+        dialog.exec()
+
+    def _on_settings_saved(self):
+        """Handle settings being saved."""
+        # Update user label
+        user_name = self.config.get('user.name', 'User')
+        self.user_label.setText(f"User: {user_name}")
+
+        # Update display settings
+        show_timestamps = self.config.get('display.show_timestamps', True)
+        self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
+        self.transcription_display.set_show_timestamps(show_timestamps)
+        self.transcription_display.set_font(
+            self.config.get('display.font_family', 'Courier'),
+            self.config.get('display.font_size', 12)
+        )
+
+        # Update web server settings
+        if self.web_server:
+            self.web_server.show_timestamps = show_timestamps
+            self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
+
+        # Check if model/device settings changed - reload model if needed
+        new_model = self.config.get('transcription.model', 'base')
+        new_device_config = self.config.get('transcription.device', 'auto')
+
+        # Only reload if model size or device changed
+        if self.current_model_size != new_model or self.current_device_config != new_device_config:
+            self._reload_model()
+        else:
+            QMessageBox.information(self, "Settings Saved", "Settings have been applied successfully!")
+
+    def _reload_model(self):
+        """Reload the transcription model with new settings."""
+        # Stop transcription if running
+        was_transcribing = self.is_transcribing
+        if was_transcribing:
+            self._stop_transcription()
+
+        # Update status
+        self.status_label.setText("⚙ Reloading model...")
+        self.start_button.setEnabled(False)
+
+        # Unload current model
+        if self.transcription_engine:
+            self.transcription_engine.unload_model()
+
+        # Set device based on config
+        device_config = self.config.get('transcription.device', 'auto')
+        self.device_manager.set_device(device_config)
+
+        # Re-initialize transcription engine
+        model_size = self.config.get('transcription.model', 'base')
+        language = self.config.get('transcription.language', 'en')
+        device = self.device_manager.get_device_for_whisper()
+        compute_type = self.device_manager.get_compute_type()
+
+        # Update tracked settings
+        self.current_model_size = model_size
+        self.current_device_config = device_config
+
+        self.transcription_engine = TranscriptionEngine(
+            model_size=model_size,
+            device=device,
+            compute_type=compute_type,
+            language=language,
+            min_confidence=self.config.get('processing.min_confidence', 0.5)
+        )
+
+        # Load model in background thread
+        if self.model_loader_thread and self.model_loader_thread.isRunning():
+            self.model_loader_thread.wait()
+
+        self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
+        self.model_loader_thread.finished.connect(self._on_model_reloaded)
+        self.model_loader_thread.start()
+
+    def _on_model_reloaded(self, success: bool, message: str):
+        """Handle model reloading completion."""
+        if success:
+            host = self.config.get('web_server.host', '127.0.0.1')
+            port = self.config.get('web_server.port', 8080)
+            self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
+            self.start_button.setEnabled(True)
+            QMessageBox.information(self, "Settings Saved", "Model reloaded successfully with new settings!")
+        else:
+            self.status_label.setText("❌ Model loading failed")
+            QMessageBox.critical(self, "Error", f"Failed to reload model:\n{message}")
+            self.start_button.setEnabled(False)
+
+    def closeEvent(self, event):
+        """Handle window closing."""
+        # Stop transcription if running
+        if self.is_transcribing:
+            self._stop_transcription()
+
+        # Unload model
+        if self.transcription_engine:
+            self.transcription_engine.unload_model()
+
+        # Wait for model loader thread
+        if self.model_loader_thread and self.model_loader_thread.isRunning():
+            self.model_loader_thread.wait()
+
+        event.accept()
--- a/gui/settings_dialog.py
+++ b/gui/settings_dialog.py
@@ -0,0 +1,310 @@
+"""Settings dialog for configuring the application."""
+
+import customtkinter as ctk
+from tkinter import messagebox
+from typing import Callable, List, Tuple
+
+
+class SettingsDialog(ctk.CTkToplevel):
+    """Dialog window for application settings."""
+
+    def __init__(
+        self,
+        parent,
+        config,
+        audio_devices: List[Tuple[int, str]],
+        compute_devices: List[Tuple[str, str]],
+        on_save: Callable = None
+    ):
+        """
+        Initialize settings dialog.
+
+        Args:
+            parent: Parent window
+            config: Configuration object
+            audio_devices: List of (device_index, device_name) tuples
+            compute_devices: List of (device_id, device_description) tuples
+            on_save: Callback function when settings are saved
+        """
+        super().__init__(parent)
+
+        self.config = config
+        self.audio_devices = audio_devices
+        self.compute_devices = compute_devices
+        self.on_save = on_save
+
+        # Window configuration
+        self.title("Settings")
+        self.geometry("600x700")
+        self.resizable(False, False)
+
+        # Make dialog modal
+        self.transient(parent)
+        self.grab_set()
+
+        self._create_widgets()
+        self._load_current_settings()
+
+    def _create_widgets(self):
+        """Create all settings widgets."""
+        # Main container with padding
+        main_frame = ctk.CTkFrame(self)
+        main_frame.pack(fill="both", expand=True, padx=20, pady=20)
+
+        # User Settings Section
+        user_frame = ctk.CTkFrame(main_frame)
+        user_frame.pack(fill="x", pady=(0, 15))
+
+        ctk.CTkLabel(user_frame, text="User Settings", font=("", 16, "bold")).pack(
+            anchor="w", padx=10, pady=(10, 5)
+        )
+
+        # User name
+        name_frame = ctk.CTkFrame(user_frame)
+        name_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(name_frame, text="Display Name:", width=150).pack(side="left", padx=5)
+        self.name_entry = ctk.CTkEntry(name_frame, width=300)
+        self.name_entry.pack(side="left", padx=5)
+
+        # Audio Settings Section
+        audio_frame = ctk.CTkFrame(main_frame)
+        audio_frame.pack(fill="x", pady=(0, 15))
+
+        ctk.CTkLabel(audio_frame, text="Audio Settings", font=("", 16, "bold")).pack(
+            anchor="w", padx=10, pady=(10, 5)
+        )
+
+        # Audio device
+        device_frame = ctk.CTkFrame(audio_frame)
+        device_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(device_frame, text="Input Device:", width=150).pack(side="left", padx=5)
+        device_names = [name for _, name in self.audio_devices]
+        self.audio_device_menu = ctk.CTkOptionMenu(device_frame, values=device_names, width=300)
+        self.audio_device_menu.pack(side="left", padx=5)
+
+        # Chunk duration
+        chunk_frame = ctk.CTkFrame(audio_frame)
+        chunk_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(chunk_frame, text="Chunk Duration (s):", width=150).pack(side="left", padx=5)
+        self.chunk_entry = ctk.CTkEntry(chunk_frame, width=100)
+        self.chunk_entry.pack(side="left", padx=5)
+
+        # Transcription Settings Section
+        transcription_frame = ctk.CTkFrame(main_frame)
+        transcription_frame.pack(fill="x", pady=(0, 15))
+
+        ctk.CTkLabel(transcription_frame, text="Transcription Settings", font=("", 16, "bold")).pack(
+            anchor="w", padx=10, pady=(10, 5)
+        )
+
+        # Model size
+        model_frame = ctk.CTkFrame(transcription_frame)
+        model_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(model_frame, text="Model Size:", width=150).pack(side="left", padx=5)
+        self.model_menu = ctk.CTkOptionMenu(
+            model_frame,
+            values=["tiny", "base", "small", "medium", "large"],
+            width=200
+        )
+        self.model_menu.pack(side="left", padx=5)
+
+        # Compute device
+        compute_frame = ctk.CTkFrame(transcription_frame)
+        compute_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(compute_frame, text="Compute Device:", width=150).pack(side="left", padx=5)
+        device_descs = [desc for _, desc in self.compute_devices]
+        self.compute_device_menu = ctk.CTkOptionMenu(compute_frame, values=device_descs, width=300)
+        self.compute_device_menu.pack(side="left", padx=5)
+
+        # Language
+        lang_frame = ctk.CTkFrame(transcription_frame)
+        lang_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(lang_frame, text="Language:", width=150).pack(side="left", padx=5)
+        self.lang_menu = ctk.CTkOptionMenu(
+            lang_frame,
+            values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"],
+            width=200
+        )
+        self.lang_menu.pack(side="left", padx=5)
+
+        # Noise Suppression Section
+        noise_frame = ctk.CTkFrame(main_frame)
+        noise_frame.pack(fill="x", pady=(0, 15))
+
+        ctk.CTkLabel(noise_frame, text="Noise Suppression", font=("", 16, "bold")).pack(
+            anchor="w", padx=10, pady=(10, 5)
+        )
+
+        # Enable noise suppression
+        ns_enable_frame = ctk.CTkFrame(noise_frame)
+        ns_enable_frame.pack(fill="x", padx=10, pady=5)
+        self.noise_enabled_var = ctk.BooleanVar()
+        self.noise_enabled_check = ctk.CTkCheckBox(
+            ns_enable_frame,
+            text="Enable Noise Suppression",
+            variable=self.noise_enabled_var
+        )
+        self.noise_enabled_check.pack(side="left", padx=5)
+
+        # Noise suppression strength
+        strength_frame = ctk.CTkFrame(noise_frame)
+        strength_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(strength_frame, text="Strength:", width=150).pack(side="left", padx=5)
+        self.noise_strength_slider = ctk.CTkSlider(
+            strength_frame,
+            from_=0.0,
+            to=1.0,
+            number_of_steps=20,
+            width=300
+        )
+        self.noise_strength_slider.pack(side="left", padx=5)
+        self.noise_strength_label = ctk.CTkLabel(strength_frame, text="0.7", width=40)
+        self.noise_strength_label.pack(side="left", padx=5)
+        self.noise_strength_slider.configure(command=self._update_strength_label)
+
+        # VAD
+        vad_frame = ctk.CTkFrame(noise_frame)
+        vad_frame.pack(fill="x", padx=10, pady=5)
+        self.vad_enabled_var = ctk.BooleanVar()
+        self.vad_enabled_check = ctk.CTkCheckBox(
+            vad_frame,
+            text="Enable Voice Activity Detection",
+            variable=self.vad_enabled_var
+        )
+        self.vad_enabled_check.pack(side="left", padx=5)
+
+        # Display Settings Section
+        display_frame = ctk.CTkFrame(main_frame)
+        display_frame.pack(fill="x", pady=(0, 15))
+
+        ctk.CTkLabel(display_frame, text="Display Settings", font=("", 16, "bold")).pack(
+            anchor="w", padx=10, pady=(10, 5)
+        )
+
+        # Show timestamps
+        ts_frame = ctk.CTkFrame(display_frame)
+        ts_frame.pack(fill="x", padx=10, pady=5)
+        self.timestamps_var = ctk.BooleanVar()
+        self.timestamps_check = ctk.CTkCheckBox(
+            ts_frame,
+            text="Show Timestamps",
+            variable=self.timestamps_var
+        )
+        self.timestamps_check.pack(side="left", padx=5)
+
+        # Max lines
+        maxlines_frame = ctk.CTkFrame(display_frame)
+        maxlines_frame.pack(fill="x", padx=10, pady=5)
+        ctk.CTkLabel(maxlines_frame, text="Max Lines:", width=150).pack(side="left", padx=5)
+        self.maxlines_entry = ctk.CTkEntry(maxlines_frame, width=100)
+        self.maxlines_entry.pack(side="left", padx=5)
+
+        # Buttons
+        button_frame = ctk.CTkFrame(main_frame)
+        button_frame.pack(fill="x", pady=(10, 0))
+
+        self.save_button = ctk.CTkButton(
+            button_frame,
+            text="Save",
+            command=self._save_settings,
+            width=120
+        )
+        self.save_button.pack(side="right", padx=5)
+
+        self.cancel_button = ctk.CTkButton(
+            button_frame,
+            text="Cancel",
+            command=self.destroy,
+            width=120,
+            fg_color="gray"
+        )
+        self.cancel_button.pack(side="right", padx=5)
+
+    def _update_strength_label(self, value):
+        """Update the noise strength label."""
+        self.noise_strength_label.configure(text=f"{value:.1f}")
+
+    def _load_current_settings(self):
+        """Load current settings from config."""
+        # User settings
+        self.name_entry.insert(0, self.config.get('user.name', 'User'))
+
+        # Audio settings
+        current_device = self.config.get('audio.input_device', 'default')
+        for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
+            if str(dev_idx) == current_device or current_device == 'default' and idx == 0:
+                self.audio_device_menu.set(dev_name)
+                break
+
+        self.chunk_entry.insert(0, str(self.config.get('audio.chunk_duration', 3.0)))
+
+        # Transcription settings
+        self.model_menu.set(self.config.get('transcription.model', 'base'))
+
+        current_compute = self.config.get('transcription.device', 'auto')
+        for dev_id, dev_desc in self.compute_devices:
+            if dev_id == current_compute or (current_compute == 'auto' and dev_id == self.compute_devices[0][0]):
+                self.compute_device_menu.set(dev_desc)
+                break
+
+        self.lang_menu.set(self.config.get('transcription.language', 'en'))
+
+        # Noise suppression
+        self.noise_enabled_var.set(self.config.get('noise_suppression.enabled', True))
+        strength = self.config.get('noise_suppression.strength', 0.7)
+        self.noise_strength_slider.set(strength)
+        self._update_strength_label(strength)
+        self.vad_enabled_var.set(self.config.get('processing.use_vad', True))
+
+        # Display settings
+        self.timestamps_var.set(self.config.get('display.show_timestamps', True))
+        self.maxlines_entry.insert(0, str(self.config.get('display.max_lines', 100)))
+
+    def _save_settings(self):
+        """Save settings to config."""
+        try:
+            # User settings
+            self.config.set('user.name', self.name_entry.get())
+
+            # Audio settings
+            selected_audio = self.audio_device_menu.get()
+            for dev_idx, dev_name in self.audio_devices:
+                if dev_name == selected_audio:
+                    self.config.set('audio.input_device', str(dev_idx))
+                    break
+
+            chunk_duration = float(self.chunk_entry.get())
+            self.config.set('audio.chunk_duration', chunk_duration)
+
+            # Transcription settings
+            self.config.set('transcription.model', self.model_menu.get())
+
+            selected_compute = self.compute_device_menu.get()
+            for dev_id, dev_desc in self.compute_devices:
+                if dev_desc == selected_compute:
+                    self.config.set('transcription.device', dev_id)
+                    break
+
+            self.config.set('transcription.language', self.lang_menu.get())
+
+            # Noise suppression
+            self.config.set('noise_suppression.enabled', self.noise_enabled_var.get())
+            self.config.set('noise_suppression.strength', self.noise_strength_slider.get())
+            self.config.set('processing.use_vad', self.vad_enabled_var.get())
+
+            # Display settings
+            self.config.set('display.show_timestamps', self.timestamps_var.get())
+            max_lines = int(self.maxlines_entry.get())
+            self.config.set('display.max_lines', max_lines)
+
+            # Call save callback
+            if self.on_save:
+                self.on_save()
+
+            messagebox.showinfo("Settings Saved", "Settings have been saved successfully!")
+            self.destroy()
+
+        except ValueError as e:
+            messagebox.showerror("Invalid Input", f"Please check your input values:\n{e}")
+        except Exception as e:
+            messagebox.showerror("Error", f"Failed to save settings:\n{e}")
--- a/gui/settings_dialog_qt.py
+++ b/gui/settings_dialog_qt.py
@@ -0,0 +1,261 @@
+"""PySide6 settings dialog for configuring the application."""
+
+from PySide6.QtWidgets import (
+    QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
+    QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
+    QPushButton, QMessageBox, QGroupBox
+)
+from PySide6.QtCore import Qt
+from typing import Callable, List, Tuple
+
+
+class SettingsDialog(QDialog):
+    """Dialog window for application settings using PySide6."""
+
+    def __init__(
+        self,
+        parent,
+        config,
+        audio_devices: List[Tuple[int, str]],
+        compute_devices: List[Tuple[str, str]],
+        on_save: Callable = None
+    ):
+        """
+        Initialize settings dialog.
+
+        Args:
+            parent: Parent window
+            config: Configuration object
+            audio_devices: List of (device_index, device_name) tuples
+            compute_devices: List of (device_id, device_description) tuples
+            on_save: Callback function when settings are saved
+        """
+        super().__init__(parent)
+
+        self.config = config
+        self.audio_devices = audio_devices
+        self.compute_devices = compute_devices
+        self.on_save = on_save
+
+        # Window configuration
+        self.setWindowTitle("Settings")
+        self.setMinimumSize(600, 700)
+        self.setModal(True)
+
+        self._create_widgets()
+        self._load_current_settings()
+
+    def _create_widgets(self):
+        """Create all settings widgets."""
+        main_layout = QVBoxLayout()
+        self.setLayout(main_layout)
+
+        # User Settings Group
+        user_group = QGroupBox("User Settings")
+        user_layout = QFormLayout()
+
+        self.name_input = QLineEdit()
+        user_layout.addRow("Display Name:", self.name_input)
+
+        user_group.setLayout(user_layout)
+        main_layout.addWidget(user_group)
+
+        # Audio Settings Group
+        audio_group = QGroupBox("Audio Settings")
+        audio_layout = QFormLayout()
+
+        self.audio_device_combo = QComboBox()
+        device_names = [name for _, name in self.audio_devices]
+        self.audio_device_combo.addItems(device_names)
+        audio_layout.addRow("Input Device:", self.audio_device_combo)
+
+        self.chunk_input = QLineEdit()
+        audio_layout.addRow("Chunk Duration (s):", self.chunk_input)
+
+        audio_group.setLayout(audio_layout)
+        main_layout.addWidget(audio_group)
+
+        # Transcription Settings Group
+        transcription_group = QGroupBox("Transcription Settings")
+        transcription_layout = QFormLayout()
+
+        self.model_combo = QComboBox()
+        self.model_combo.addItems(["tiny", "base", "small", "medium", "large"])
+        transcription_layout.addRow("Model Size:", self.model_combo)
+
+        self.compute_device_combo = QComboBox()
+        device_descs = [desc for _, desc in self.compute_devices]
+        self.compute_device_combo.addItems(device_descs)
+        transcription_layout.addRow("Compute Device:", self.compute_device_combo)
+
+        self.lang_combo = QComboBox()
+        self.lang_combo.addItems(["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"])
+        transcription_layout.addRow("Language:", self.lang_combo)
+
+        transcription_group.setLayout(transcription_layout)
+        main_layout.addWidget(transcription_group)
+
+        # Noise Suppression Group
+        noise_group = QGroupBox("Noise Suppression")
+        noise_layout = QVBoxLayout()
+
+        self.noise_enabled_check = QCheckBox("Enable Noise Suppression")
+        noise_layout.addWidget(self.noise_enabled_check)
+
+        # Strength slider
+        strength_layout = QHBoxLayout()
+        strength_layout.addWidget(QLabel("Strength:"))
+
+        self.noise_strength_slider = QSlider(Qt.Horizontal)
+        self.noise_strength_slider.setMinimum(0)
+        self.noise_strength_slider.setMaximum(100)
+        self.noise_strength_slider.setValue(70)
+        self.noise_strength_slider.valueChanged.connect(self._update_strength_label)
+        strength_layout.addWidget(self.noise_strength_slider)
+
+        self.noise_strength_label = QLabel("0.7")
+        strength_layout.addWidget(self.noise_strength_label)
+
+        noise_layout.addLayout(strength_layout)
+
+        self.vad_enabled_check = QCheckBox("Enable Voice Activity Detection")
+        noise_layout.addWidget(self.vad_enabled_check)
+
+        noise_group.setLayout(noise_layout)
+        main_layout.addWidget(noise_group)
+
+        # Display Settings Group
+        display_group = QGroupBox("Display Settings")
+        display_layout = QFormLayout()
+
+        self.timestamps_check = QCheckBox()
+        display_layout.addRow("Show Timestamps:", self.timestamps_check)
+
+        self.maxlines_input = QLineEdit()
+        display_layout.addRow("Max Lines:", self.maxlines_input)
+
+        self.font_family_combo = QComboBox()
+        self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"])
+        display_layout.addRow("Font Family:", self.font_family_combo)
+
+        self.font_size_input = QLineEdit()
+        display_layout.addRow("Font Size:", self.font_size_input)
+
+        self.fade_seconds_input = QLineEdit()
+        display_layout.addRow("Fade After (seconds):", self.fade_seconds_input)
+
+        display_group.setLayout(display_layout)
+        main_layout.addWidget(display_group)
+
+        # Buttons
+        button_layout = QHBoxLayout()
+        button_layout.addStretch()
+
+        self.cancel_button = QPushButton("Cancel")
+        self.cancel_button.clicked.connect(self.reject)
+        button_layout.addWidget(self.cancel_button)
+
+        self.save_button = QPushButton("Save")
+        self.save_button.clicked.connect(self._save_settings)
+        self.save_button.setDefault(True)
+        button_layout.addWidget(self.save_button)
+
+        main_layout.addLayout(button_layout)
+
+    def _update_strength_label(self, value):
+        """Update the noise strength label."""
+        self.noise_strength_label.setText(f"{value / 100:.1f}")
+
+    def _load_current_settings(self):
+        """Load current settings from config."""
+        # User settings
+        self.name_input.setText(self.config.get('user.name', 'User'))
+
+        # Audio settings
+        current_device = self.config.get('audio.input_device', 'default')
+        for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
+            if str(dev_idx) == current_device or (current_device == 'default' and idx == 0):
+                self.audio_device_combo.setCurrentIndex(idx)
+                break
+
+        self.chunk_input.setText(str(self.config.get('audio.chunk_duration', 3.0)))
+
+        # Transcription settings
+        model = self.config.get('transcription.model', 'base')
+        self.model_combo.setCurrentText(model)
+
+        current_compute = self.config.get('transcription.device', 'auto')
+        for idx, (dev_id, dev_desc) in enumerate(self.compute_devices):
+            if dev_id == current_compute or (current_compute == 'auto' and idx == 0):
+                self.compute_device_combo.setCurrentIndex(idx)
+                break
+
+        lang = self.config.get('transcription.language', 'en')
+        self.lang_combo.setCurrentText(lang)
+
+        # Noise suppression
+        self.noise_enabled_check.setChecked(self.config.get('noise_suppression.enabled', True))
+        strength = self.config.get('noise_suppression.strength', 0.7)
+        self.noise_strength_slider.setValue(int(strength * 100))
+        self._update_strength_label(int(strength * 100))
+        self.vad_enabled_check.setChecked(self.config.get('processing.use_vad', True))
+
+        # Display settings
+        self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True))
+        self.maxlines_input.setText(str(self.config.get('display.max_lines', 100)))
+
+        font_family = self.config.get('display.font_family', 'Courier')
+        self.font_family_combo.setCurrentText(font_family)
+
+        self.font_size_input.setText(str(self.config.get('display.font_size', 12)))
+        self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10)))
+
+    def _save_settings(self):
+        """Save settings to config."""
+        try:
+            # User settings
+            self.config.set('user.name', self.name_input.text())
+
+            # Audio settings
+            selected_audio_idx = self.audio_device_combo.currentIndex()
+            dev_idx, _ = self.audio_devices[selected_audio_idx]
+            self.config.set('audio.input_device', str(dev_idx))
+
+            chunk_duration = float(self.chunk_input.text())
+            self.config.set('audio.chunk_duration', chunk_duration)
+
+            # Transcription settings
+            self.config.set('transcription.model', self.model_combo.currentText())
+
+            selected_compute_idx = self.compute_device_combo.currentIndex()
+            dev_id, _ = self.compute_devices[selected_compute_idx]
+            self.config.set('transcription.device', dev_id)
+
+            self.config.set('transcription.language', self.lang_combo.currentText())
+
+            # Noise suppression
+            self.config.set('noise_suppression.enabled', self.noise_enabled_check.isChecked())
+            self.config.set('noise_suppression.strength', self.noise_strength_slider.value() / 100.0)
+            self.config.set('processing.use_vad', self.vad_enabled_check.isChecked())
+
+            # Display settings
+            self.config.set('display.show_timestamps', self.timestamps_check.isChecked())
+            max_lines = int(self.maxlines_input.text())
+            self.config.set('display.max_lines', max_lines)
+            self.config.set('display.font_family', self.font_family_combo.currentText())
+            font_size = int(self.font_size_input.text())
+            self.config.set('display.font_size', font_size)
+            fade_seconds = int(self.fade_seconds_input.text())
+            self.config.set('display.fade_after_seconds', fade_seconds)
+
+            # Call save callback
+            if self.on_save:
+                self.on_save()
+
+            QMessageBox.information(self, "Settings Saved", "Settings have been saved successfully!")
+            self.accept()
+
+        except ValueError as e:
+            QMessageBox.critical(self, "Invalid Input", f"Please check your input values:\n{e}")
+        except Exception as e:
+            QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")
--- a/gui/transcription_display.py
+++ b/gui/transcription_display.py
@@ -0,0 +1,127 @@
+"""Transcription display widget for showing real-time transcriptions."""
+
+import customtkinter as ctk
+from typing import List
+from datetime import datetime
+
+
+class TranscriptionDisplay(ctk.CTkTextbox):
+    """Custom text widget for displaying transcriptions."""
+
+    def __init__(self, master, max_lines: int = 100, show_timestamps: bool = True, **kwargs):
+        """
+        Initialize transcription display.
+
+        Args:
+            master: Parent widget
+            max_lines: Maximum number of lines to keep in display
+            show_timestamps: Whether to show timestamps
+            **kwargs: Additional arguments for CTkTextbox
+        """
+        super().__init__(master, **kwargs)
+
+        self.max_lines = max_lines
+        self.show_timestamps = show_timestamps
+        self.line_count = 0
+
+        # Configure text widget
+        self.configure(state="disabled")  # Read-only by default
+
+    def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
+        """
+        Add a new transcription to the display.
+
+        Args:
+            text: Transcription text
+            user_name: User/speaker name
+            timestamp: Timestamp of transcription
+        """
+        if timestamp is None:
+            timestamp = datetime.now()
+
+        # Build the display line
+        line_parts = []
+
+        if self.show_timestamps:
+            time_str = timestamp.strftime("%H:%M:%S")
+            line_parts.append(f"[{time_str}]")
+
+        if user_name:
+            line_parts.append(f"{user_name}:")
+
+        line_parts.append(text)
+
+        line = " ".join(line_parts) + "\n"
+
+        # Add to display
+        self.configure(state="normal")
+        self.insert("end", line)
+        self.configure(state="disabled")
+
+        # Auto-scroll to bottom
+        self.see("end")
+
+        # Track line count
+        self.line_count += 1
+
+        # Remove old lines if exceeding max
+        if self.line_count > self.max_lines:
+            self._remove_oldest_lines(self.line_count - self.max_lines)
+
+    def _remove_oldest_lines(self, num_lines: int):
+        """
+        Remove oldest lines from the display.
+
+        Args:
+            num_lines: Number of lines to remove
+        """
+        self.configure(state="normal")
+        self.delete("1.0", f"{num_lines + 1}.0")
+        self.configure(state="disabled")
+        self.line_count -= num_lines
+
+    def clear(self):
+        """Clear all transcriptions."""
+        self.configure(state="normal")
+        self.delete("1.0", "end")
+        self.configure(state="disabled")
+        self.line_count = 0
+
+    def get_all_text(self) -> str:
+        """
+        Get all transcription text.
+
+        Returns:
+            All text in the display
+        """
+        return self.get("1.0", "end")
+
+    def set_max_lines(self, max_lines: int):
+        """Update maximum number of lines to keep."""
+        self.max_lines = max_lines
+
+        # Trim if necessary
+        if self.line_count > self.max_lines:
+            self._remove_oldest_lines(self.line_count - self.max_lines)
+
+    def set_show_timestamps(self, show: bool):
+        """Update whether to show timestamps."""
+        self.show_timestamps = show
+
+    def save_to_file(self, filepath: str) -> bool:
+        """
+        Save transcriptions to a file.
+
+        Args:
+            filepath: Path to save file
+
+        Returns:
+            True if saved successfully
+        """
+        try:
+            with open(filepath, 'w') as f:
+                f.write(self.get_all_text())
+            return True
+        except Exception as e:
+            print(f"Error saving transcriptions: {e}")
+            return False
--- a/gui/transcription_display_qt.py
+++ b/gui/transcription_display_qt.py
@@ -0,0 +1,159 @@
+"""PySide6 transcription display widget for showing real-time transcriptions."""
+
+from PySide6.QtWidgets import QTextEdit
+from PySide6.QtGui import QFont, QTextCursor
+from PySide6.QtCore import Qt, Slot
+from datetime import datetime
+
+
+class TranscriptionDisplay(QTextEdit):
+    """Custom text widget for displaying transcriptions using PySide6."""
+
+    def __init__(self, parent=None, max_lines=100, show_timestamps=True, font_family="Courier", font_size=12):
+        """
+        Initialize transcription display.
+
+        Args:
+            parent: Parent widget
+            max_lines: Maximum number of lines to keep in display
+            show_timestamps: Whether to show timestamps
+            font_family: Font family name
+            font_size: Font size in points
+        """
+        super().__init__(parent)
+
+        self.max_lines = max_lines
+        self.show_timestamps = show_timestamps
+        self.line_count = 0
+        self.font_family = font_family
+        self.font_size = font_size
+
+        # Configure text widget
+        self.setReadOnly(True)
+        self.setFont(QFont(font_family, font_size))
+
+        # Set dark theme styling
+        self.setStyleSheet("""
+            QTextEdit {
+                background-color: #2b2b2b;
+                color: #ffffff;
+                border: 1px solid #3d3d3d;
+                border-radius: 5px;
+                padding: 10px;
+            }
+        """)
+
+    @Slot(str, str)
+    def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
+        """
+        Add a new transcription to the display.
+
+        Args:
+            text: Transcription text
+            user_name: User/speaker name
+            timestamp: Timestamp of transcription
+        """
+        if timestamp is None:
+            timestamp = datetime.now()
+
+        # Build the display line
+        line_parts = []
+
+        if self.show_timestamps:
+            time_str = timestamp.strftime("%H:%M:%S")
+            line_parts.append(f"[{time_str}]")
+
+        if user_name:
+            line_parts.append(f"{user_name}:")
+
+        line_parts.append(text)
+
+        line = " ".join(line_parts)
+
+        # Add to display
+        self.append(line)
+
+        # Auto-scroll to bottom
+        cursor = self.textCursor()
+        cursor.movePosition(QTextCursor.End)
+        self.setTextCursor(cursor)
+
+        # Track line count
+        self.line_count += 1
+
+        # Remove old lines if exceeding max
+        if self.line_count > self.max_lines:
+            self._remove_oldest_lines(self.line_count - self.max_lines)
+
+    def _remove_oldest_lines(self, num_lines: int):
+        """
+        Remove oldest lines from the display.
+
+        Args:
+            num_lines: Number of lines to remove
+        """
+        cursor = self.textCursor()
+        cursor.movePosition(QTextCursor.Start)
+
+        for _ in range(num_lines):
+            cursor.select(QTextCursor.BlockUnderCursor)
+            cursor.removeSelectedText()
+            cursor.deleteChar()  # Remove the newline
+
+        self.line_count -= num_lines
+
+    def clear_all(self):
+        """Clear all transcriptions."""
+        self.clear()
+        self.line_count = 0
+
+    def get_all_text(self) -> str:
+        """
+        Get all transcription text.
+
+        Returns:
+            All text in the display
+        """
+        return self.toPlainText()
+
+    def set_max_lines(self, max_lines: int):
+        """Update maximum number of lines to keep."""
+        self.max_lines = max_lines
+
+        # Trim if necessary
+        if self.line_count > self.max_lines:
+            self._remove_oldest_lines(self.line_count - self.max_lines)
+
+    def set_show_timestamps(self, show: bool):
+        """Update whether to show timestamps."""
+        self.show_timestamps = show
+
+    def set_font(self, font_family: str, font_size: int):
+        """
+        Update font settings.
+
+        Args:
+            font_family: Font family name
+            font_size: Font size in points
+        """
+        self.font_family = font_family
+        self.font_size = font_size
+        super().setFont(QFont(font_family, font_size))
+
+    def save_to_file(self, filepath: str) -> bool:
+        """
+        Save transcriptions to a file.
+
+        Args:
+            filepath: Path to save file
+
+        Returns:
+            True if saved successfully
+        """
+        try:
+            with open(filepath, 'w') as f:
+                f.write(self.toPlainText())
+            return True
+        except Exception as e:
+            print(f"Error saving transcriptions: {e}")
+            return False