"""PySide6 settings dialog for configuring the application.""" from PySide6.QtWidgets import ( QDialog, QVBoxLayout, QHBoxLayout, QFormLayout, QLabel, QLineEdit, QComboBox, QCheckBox, QSlider, QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget ) from PySide6.QtCore import Qt from PySide6.QtGui import QScreen from typing import Callable, List, Tuple class SettingsDialog(QDialog): """Dialog window for application settings using PySide6.""" def __init__( self, parent, config, audio_devices: List[Tuple[int, str]], compute_devices: List[Tuple[str, str]], on_save: Callable = None ): """ Initialize settings dialog. Args: parent: Parent window config: Configuration object audio_devices: List of (device_index, device_name) tuples compute_devices: List of (device_id, device_description) tuples on_save: Callback function when settings are saved """ super().__init__(parent) self.config = config self.audio_devices = audio_devices self.compute_devices = compute_devices self.on_save = on_save # Window configuration self.setWindowTitle("Settings") self.setModal(True) # Calculate size based on screen size (80% of screen height, max 900px) screen = QScreen.availableGeometry(parent.screen() if parent else None) max_height = min(int(screen.height() * 0.8), 900) self.setMinimumSize(700, 500) self.resize(700, max_height) self._create_widgets() self._load_current_settings() def _create_widgets(self): """Create all settings widgets.""" # Main layout for the dialog (contains scroll area + buttons) main_layout = QVBoxLayout() main_layout.setContentsMargins(0, 0, 0, 0) main_layout.setSpacing(0) self.setLayout(main_layout) # Create scroll area for settings content scroll_area = QScrollArea() scroll_area.setWidgetResizable(True) scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) scroll_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) # Create content widget for scroll area content_widget = QWidget() content_layout = QVBoxLayout() content_layout.setSpacing(15) # Add spacing between groups content_layout.setContentsMargins(20, 20, 20, 20) # Add padding content_widget.setLayout(content_layout) scroll_area.setWidget(content_widget) # Add scroll area to main layout main_layout.addWidget(scroll_area) # User Settings Group user_group = QGroupBox("User Settings") user_layout = QFormLayout() user_layout.setSpacing(10) self.name_input = QLineEdit() self.name_input.setToolTip("Your display name shown in transcriptions and sent to multi-user server") user_layout.addRow("Display Name:", self.name_input) user_group.setLayout(user_layout) content_layout.addWidget(user_group) # Audio Settings Group audio_group = QGroupBox("Audio Settings") audio_layout = QFormLayout() audio_layout.setSpacing(10) self.audio_device_combo = QComboBox() self.audio_device_combo.setToolTip("Select your microphone or audio input device") device_names = [name for _, name in self.audio_devices] self.audio_device_combo.addItems(device_names) audio_layout.addRow("Input Device:", self.audio_device_combo) audio_group.setLayout(audio_layout) content_layout.addWidget(audio_group) # Transcription Settings Group transcription_group = QGroupBox("Transcription Settings") transcription_layout = QFormLayout() transcription_layout.setSpacing(10) self.model_combo = QComboBox() self.model_combo.setToolTip( "Whisper model size:\n" "• tiny/tiny.en - Fastest, lowest quality\n" "• base/base.en - Good balance for real-time\n" "• small/small.en - Better quality, slower\n" "• medium/medium.en - High quality, much slower\n" "• large-v1/v2/v3 - Best quality, very slow\n" "(.en models are English-only, faster)" ) self.model_combo.addItems([ "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3" ]) transcription_layout.addRow("Model Size:", self.model_combo) self.compute_device_combo = QComboBox() self.compute_device_combo.setToolTip("Hardware to use for transcription (GPU is 5-10x faster than CPU)") device_descs = [desc for _, desc in self.compute_devices] self.compute_device_combo.addItems(device_descs) transcription_layout.addRow("Compute Device:", self.compute_device_combo) self.compute_type_combo = QComboBox() self.compute_type_combo.setToolTip( "Precision for model calculations:\n" "• default - Automatic selection\n" "• int8 - Fastest, uses less memory\n" "• float16 - GPU only, good balance\n" "• float32 - Slowest, best quality" ) self.compute_type_combo.addItems(["default", "int8", "float16", "float32"]) transcription_layout.addRow("Compute Type:", self.compute_type_combo) self.lang_combo = QComboBox() self.lang_combo.setToolTip("Language to transcribe (auto-detect or specific language)") self.lang_combo.addItems(["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"]) transcription_layout.addRow("Language:", self.lang_combo) self.beam_size_combo = QComboBox() self.beam_size_combo.setToolTip( "Beam search size for decoding:\n" "• Higher = Better quality but slower\n" "• 1 = Greedy (fastest)\n" "• 5 = Good balance (recommended)\n" "• 10 = Best quality (slowest)" ) self.beam_size_combo.addItems(["1", "2", "3", "5", "8", "10"]) transcription_layout.addRow("Beam Size:", self.beam_size_combo) transcription_group.setLayout(transcription_layout) content_layout.addWidget(transcription_group) # Realtime Preview Group realtime_group = QGroupBox("Realtime Preview (Optional)") realtime_layout = QFormLayout() realtime_layout.setSpacing(10) self.realtime_enabled_check = QCheckBox() self.realtime_enabled_check.setToolTip( "Enable live preview transcriptions using a faster model\n" "Shows instant results while processing final transcription in background" ) realtime_layout.addRow("Enable Preview:", self.realtime_enabled_check) self.realtime_model_combo = QComboBox() self.realtime_model_combo.setToolTip("Faster model for instant preview (tiny or base recommended)") self.realtime_model_combo.addItems(["tiny", "tiny.en", "base", "base.en"]) realtime_layout.addRow("Preview Model:", self.realtime_model_combo) realtime_group.setLayout(realtime_layout) content_layout.addWidget(realtime_group) # VAD (Voice Activity Detection) Group vad_group = QGroupBox("Voice Activity Detection") vad_layout = QFormLayout() vad_layout.setSpacing(10) # Silero VAD sensitivity slider silero_layout = QHBoxLayout() self.silero_slider = QSlider(Qt.Horizontal) self.silero_slider.setMinimum(0) self.silero_slider.setMaximum(100) self.silero_slider.setValue(40) self.silero_slider.valueChanged.connect(self._update_silero_label) self.silero_slider.setToolTip( "Silero VAD sensitivity (0.0-1.0):\n" "• Lower values = More sensitive (detects quieter speech)\n" "• Higher values = Less sensitive (requires louder speech)\n" "• 0.4 is recommended for most environments" ) silero_layout.addWidget(self.silero_slider) self.silero_label = QLabel("0.4") silero_layout.addWidget(self.silero_label) vad_layout.addRow("Silero Sensitivity:", silero_layout) # WebRTC VAD sensitivity self.webrtc_combo = QComboBox() self.webrtc_combo.setToolTip( "WebRTC VAD aggressiveness:\n" "• 0 = Least aggressive (detects more speech)\n" "• 3 = Most aggressive (filters more noise)\n" "• 3 is recommended for noisy environments" ) self.webrtc_combo.addItems(["0 (most sensitive)", "1", "2", "3 (least sensitive)"]) vad_layout.addRow("WebRTC Sensitivity:", self.webrtc_combo) self.silero_onnx_check = QCheckBox("Enable (2-3x faster)") self.silero_onnx_check.setToolTip( "Use ONNX runtime for Silero VAD:\n" "• 2-3x faster processing\n" "• 30% lower CPU usage\n" "• Same quality\n" "• Recommended: Enabled" ) vad_layout.addRow("ONNX Acceleration:", self.silero_onnx_check) vad_group.setLayout(vad_layout) content_layout.addWidget(vad_group) # Advanced Timing Group timing_group = QGroupBox("Advanced Timing Settings") timing_layout = QFormLayout() timing_layout.setSpacing(10) self.post_silence_input = QLineEdit() self.post_silence_input.setToolTip( "Seconds of silence after speech before finalizing transcription:\n" "• Lower = Faster response but may cut off slow speech\n" "• Higher = More complete sentences but slower\n" "• 0.3s is recommended for real-time streaming" ) timing_layout.addRow("Post-Speech Silence (s):", self.post_silence_input) self.min_recording_input = QLineEdit() self.min_recording_input.setToolTip( "Minimum length of audio to transcribe (in seconds):\n" "• Filters out very short sounds/noise\n" "• 0.5s is recommended" ) timing_layout.addRow("Min Recording Length (s):", self.min_recording_input) self.pre_buffer_input = QLineEdit() self.pre_buffer_input.setToolTip( "Buffer before speech detection (in seconds):\n" "• Captures the start of words that triggered VAD\n" "• Prevents cutting off the first word\n" "• 0.2s is recommended" ) timing_layout.addRow("Pre-Recording Buffer (s):", self.pre_buffer_input) timing_group.setLayout(timing_layout) content_layout.addWidget(timing_group) # Display Settings Group display_group = QGroupBox("Display Settings") display_layout = QFormLayout() display_layout.setSpacing(10) self.timestamps_check = QCheckBox() self.timestamps_check.setToolTip("Show timestamp before each transcription line") display_layout.addRow("Show Timestamps:", self.timestamps_check) self.maxlines_input = QLineEdit() self.maxlines_input.setToolTip( "Maximum number of transcription lines to display:\n" "• Older lines are automatically removed\n" "• Set to 50-100 for OBS to prevent scroll bars" ) display_layout.addRow("Max Lines:", self.maxlines_input) self.font_family_combo = QComboBox() self.font_family_combo.setToolTip("Font family for transcription display") self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"]) display_layout.addRow("Font Family:", self.font_family_combo) self.font_size_input = QLineEdit() self.font_size_input.setToolTip("Font size in pixels (12-20 recommended)") display_layout.addRow("Font Size:", self.font_size_input) self.fade_seconds_input = QLineEdit() self.fade_seconds_input.setToolTip( "Seconds before transcriptions fade out:\n" "• 0 = Never fade (all transcriptions stay visible)\n" "• 10-30 = Good for OBS overlays" ) display_layout.addRow("Fade After (seconds):", self.fade_seconds_input) display_group.setLayout(display_layout) content_layout.addWidget(display_group) # Server Sync Group server_group = QGroupBox("Multi-User Server Sync (Optional)") server_layout = QFormLayout() server_layout.setSpacing(10) self.server_enabled_check = QCheckBox() self.server_enabled_check.setToolTip( "Enable multi-user server synchronization:\n" "• Share transcriptions with other users in real-time\n" "• Requires Node.js server (see server/nodejs/README.md)\n" "• All users in same room see combined transcriptions" ) server_layout.addRow("Enable Server Sync:", self.server_enabled_check) self.server_url_input = QLineEdit() self.server_url_input.setPlaceholderText("http://your-server:3000/api/send") self.server_url_input.setToolTip("URL of your Node.js multi-user server's /api/send endpoint") server_layout.addRow("Server URL:", self.server_url_input) self.server_room_input = QLineEdit() self.server_room_input.setPlaceholderText("my-room-name") self.server_room_input.setToolTip( "Room name for multi-user sessions:\n" "• All users with same room name see each other's transcriptions\n" "• Use unique room names for different groups/streams" ) server_layout.addRow("Room Name:", self.server_room_input) self.server_passphrase_input = QLineEdit() self.server_passphrase_input.setEchoMode(QLineEdit.Password) self.server_passphrase_input.setPlaceholderText("shared-secret") self.server_passphrase_input.setToolTip( "Shared secret passphrase for room access:\n" "• All users must use same passphrase to join room\n" "• Prevents unauthorized access to your transcriptions" ) server_layout.addRow("Passphrase:", self.server_passphrase_input) server_group.setLayout(server_layout) content_layout.addWidget(server_group) # Add stretch to push everything to the top content_layout.addStretch() # Buttons (outside scroll area, always visible at bottom) button_container = QWidget() button_layout = QHBoxLayout() button_layout.setContentsMargins(20, 10, 20, 10) button_layout.addStretch() self.cancel_button = QPushButton("Cancel") self.cancel_button.clicked.connect(self.reject) button_layout.addWidget(self.cancel_button) self.save_button = QPushButton("Save") self.save_button.clicked.connect(self._save_settings) self.save_button.setDefault(True) button_layout.addWidget(self.save_button) button_container.setLayout(button_layout) main_layout.addWidget(button_container) def _update_silero_label(self, value): """Update the Silero sensitivity label.""" self.silero_label.setText(f"{value / 100:.2f}") def _load_current_settings(self): """Load current settings from config.""" # User settings self.name_input.setText(self.config.get('user.name', 'User')) # Audio settings current_device = self.config.get('audio.input_device', 'default') for idx, (dev_idx, dev_name) in enumerate(self.audio_devices): if str(dev_idx) == current_device or (current_device == 'default' and idx == 0): self.audio_device_combo.setCurrentIndex(idx) break # Transcription settings model = self.config.get('transcription.model', 'base.en') self.model_combo.setCurrentText(model) current_compute = self.config.get('transcription.device', 'auto') for idx, (dev_id, dev_desc) in enumerate(self.compute_devices): if dev_id == current_compute or (current_compute == 'auto' and idx == 0): self.compute_device_combo.setCurrentIndex(idx) break compute_type = self.config.get('transcription.compute_type', 'default') self.compute_type_combo.setCurrentText(compute_type) lang = self.config.get('transcription.language', 'en') self.lang_combo.setCurrentText(lang) beam_size = self.config.get('transcription.beam_size', 5) self.beam_size_combo.setCurrentText(str(beam_size)) # Realtime preview self.realtime_enabled_check.setChecked(self.config.get('transcription.enable_realtime_transcription', False)) realtime_model = self.config.get('transcription.realtime_model', 'tiny.en') self.realtime_model_combo.setCurrentText(realtime_model) # VAD settings silero_sens = self.config.get('transcription.silero_sensitivity', 0.4) self.silero_slider.setValue(int(silero_sens * 100)) self._update_silero_label(int(silero_sens * 100)) webrtc_sens = self.config.get('transcription.webrtc_sensitivity', 3) self.webrtc_combo.setCurrentIndex(webrtc_sens) self.silero_onnx_check.setChecked(self.config.get('transcription.silero_use_onnx', True)) # Advanced timing self.post_silence_input.setText(str(self.config.get('transcription.post_speech_silence_duration', 0.3))) self.min_recording_input.setText(str(self.config.get('transcription.min_length_of_recording', 0.5))) self.pre_buffer_input.setText(str(self.config.get('transcription.pre_recording_buffer_duration', 0.2))) # Display settings self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True)) self.maxlines_input.setText(str(self.config.get('display.max_lines', 100))) font_family = self.config.get('display.font_family', 'Courier') self.font_family_combo.setCurrentText(font_family) self.font_size_input.setText(str(self.config.get('display.font_size', 12))) self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10))) # Server sync settings self.server_enabled_check.setChecked(self.config.get('server_sync.enabled', False)) self.server_url_input.setText(self.config.get('server_sync.url', '')) self.server_room_input.setText(self.config.get('server_sync.room', 'default')) self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', '')) def _save_settings(self): """Save settings to config.""" try: # User settings self.config.set('user.name', self.name_input.text()) # Audio settings selected_audio_idx = self.audio_device_combo.currentIndex() dev_idx, _ = self.audio_devices[selected_audio_idx] self.config.set('audio.input_device', str(dev_idx)) # Transcription settings self.config.set('transcription.model', self.model_combo.currentText()) selected_compute_idx = self.compute_device_combo.currentIndex() dev_id, _ = self.compute_devices[selected_compute_idx] self.config.set('transcription.device', dev_id) self.config.set('transcription.compute_type', self.compute_type_combo.currentText()) self.config.set('transcription.language', self.lang_combo.currentText()) self.config.set('transcription.beam_size', int(self.beam_size_combo.currentText())) # Realtime preview self.config.set('transcription.enable_realtime_transcription', self.realtime_enabled_check.isChecked()) self.config.set('transcription.realtime_model', self.realtime_model_combo.currentText()) # VAD settings self.config.set('transcription.silero_sensitivity', self.silero_slider.value() / 100.0) self.config.set('transcription.webrtc_sensitivity', self.webrtc_combo.currentIndex()) self.config.set('transcription.silero_use_onnx', self.silero_onnx_check.isChecked()) # Advanced timing self.config.set('transcription.post_speech_silence_duration', float(self.post_silence_input.text())) self.config.set('transcription.min_length_of_recording', float(self.min_recording_input.text())) self.config.set('transcription.pre_recording_buffer_duration', float(self.pre_buffer_input.text())) # Display settings self.config.set('display.show_timestamps', self.timestamps_check.isChecked()) max_lines = int(self.maxlines_input.text()) self.config.set('display.max_lines', max_lines) self.config.set('display.font_family', self.font_family_combo.currentText()) font_size = int(self.font_size_input.text()) self.config.set('display.font_size', font_size) fade_seconds = int(self.fade_seconds_input.text()) self.config.set('display.fade_after_seconds', fade_seconds) # Server sync settings self.config.set('server_sync.enabled', self.server_enabled_check.isChecked()) self.config.set('server_sync.url', self.server_url_input.text()) self.config.set('server_sync.room', self.server_room_input.text()) self.config.set('server_sync.passphrase', self.server_passphrase_input.text()) # Call save callback (which will show the success message) if self.on_save: self.on_save() else: # Only show message if no callback QMessageBox.information(self, "Settings Saved", "Settings have been saved successfully!") self.accept() except ValueError as e: QMessageBox.critical(self, "Invalid Input", f"Please check your input values:\n{e}") except Exception as e: QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")