Initial commit: Local Transcription App v1.0
Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
261
gui/settings_dialog_qt.py
Normal file
261
gui/settings_dialog_qt.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""PySide6 settings dialog for configuring the application."""
|
||||
|
||||
from PySide6.QtWidgets import (
|
||||
QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
|
||||
QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
|
||||
QPushButton, QMessageBox, QGroupBox
|
||||
)
|
||||
from PySide6.QtCore import Qt
|
||||
from typing import Callable, List, Tuple
|
||||
|
||||
|
||||
class SettingsDialog(QDialog):
|
||||
"""Dialog window for application settings using PySide6."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
config,
|
||||
audio_devices: List[Tuple[int, str]],
|
||||
compute_devices: List[Tuple[str, str]],
|
||||
on_save: Callable = None
|
||||
):
|
||||
"""
|
||||
Initialize settings dialog.
|
||||
|
||||
Args:
|
||||
parent: Parent window
|
||||
config: Configuration object
|
||||
audio_devices: List of (device_index, device_name) tuples
|
||||
compute_devices: List of (device_id, device_description) tuples
|
||||
on_save: Callback function when settings are saved
|
||||
"""
|
||||
super().__init__(parent)
|
||||
|
||||
self.config = config
|
||||
self.audio_devices = audio_devices
|
||||
self.compute_devices = compute_devices
|
||||
self.on_save = on_save
|
||||
|
||||
# Window configuration
|
||||
self.setWindowTitle("Settings")
|
||||
self.setMinimumSize(600, 700)
|
||||
self.setModal(True)
|
||||
|
||||
self._create_widgets()
|
||||
self._load_current_settings()
|
||||
|
||||
def _create_widgets(self):
|
||||
"""Create all settings widgets."""
|
||||
main_layout = QVBoxLayout()
|
||||
self.setLayout(main_layout)
|
||||
|
||||
# User Settings Group
|
||||
user_group = QGroupBox("User Settings")
|
||||
user_layout = QFormLayout()
|
||||
|
||||
self.name_input = QLineEdit()
|
||||
user_layout.addRow("Display Name:", self.name_input)
|
||||
|
||||
user_group.setLayout(user_layout)
|
||||
main_layout.addWidget(user_group)
|
||||
|
||||
# Audio Settings Group
|
||||
audio_group = QGroupBox("Audio Settings")
|
||||
audio_layout = QFormLayout()
|
||||
|
||||
self.audio_device_combo = QComboBox()
|
||||
device_names = [name for _, name in self.audio_devices]
|
||||
self.audio_device_combo.addItems(device_names)
|
||||
audio_layout.addRow("Input Device:", self.audio_device_combo)
|
||||
|
||||
self.chunk_input = QLineEdit()
|
||||
audio_layout.addRow("Chunk Duration (s):", self.chunk_input)
|
||||
|
||||
audio_group.setLayout(audio_layout)
|
||||
main_layout.addWidget(audio_group)
|
||||
|
||||
# Transcription Settings Group
|
||||
transcription_group = QGroupBox("Transcription Settings")
|
||||
transcription_layout = QFormLayout()
|
||||
|
||||
self.model_combo = QComboBox()
|
||||
self.model_combo.addItems(["tiny", "base", "small", "medium", "large"])
|
||||
transcription_layout.addRow("Model Size:", self.model_combo)
|
||||
|
||||
self.compute_device_combo = QComboBox()
|
||||
device_descs = [desc for _, desc in self.compute_devices]
|
||||
self.compute_device_combo.addItems(device_descs)
|
||||
transcription_layout.addRow("Compute Device:", self.compute_device_combo)
|
||||
|
||||
self.lang_combo = QComboBox()
|
||||
self.lang_combo.addItems(["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"])
|
||||
transcription_layout.addRow("Language:", self.lang_combo)
|
||||
|
||||
transcription_group.setLayout(transcription_layout)
|
||||
main_layout.addWidget(transcription_group)
|
||||
|
||||
# Noise Suppression Group
|
||||
noise_group = QGroupBox("Noise Suppression")
|
||||
noise_layout = QVBoxLayout()
|
||||
|
||||
self.noise_enabled_check = QCheckBox("Enable Noise Suppression")
|
||||
noise_layout.addWidget(self.noise_enabled_check)
|
||||
|
||||
# Strength slider
|
||||
strength_layout = QHBoxLayout()
|
||||
strength_layout.addWidget(QLabel("Strength:"))
|
||||
|
||||
self.noise_strength_slider = QSlider(Qt.Horizontal)
|
||||
self.noise_strength_slider.setMinimum(0)
|
||||
self.noise_strength_slider.setMaximum(100)
|
||||
self.noise_strength_slider.setValue(70)
|
||||
self.noise_strength_slider.valueChanged.connect(self._update_strength_label)
|
||||
strength_layout.addWidget(self.noise_strength_slider)
|
||||
|
||||
self.noise_strength_label = QLabel("0.7")
|
||||
strength_layout.addWidget(self.noise_strength_label)
|
||||
|
||||
noise_layout.addLayout(strength_layout)
|
||||
|
||||
self.vad_enabled_check = QCheckBox("Enable Voice Activity Detection")
|
||||
noise_layout.addWidget(self.vad_enabled_check)
|
||||
|
||||
noise_group.setLayout(noise_layout)
|
||||
main_layout.addWidget(noise_group)
|
||||
|
||||
# Display Settings Group
|
||||
display_group = QGroupBox("Display Settings")
|
||||
display_layout = QFormLayout()
|
||||
|
||||
self.timestamps_check = QCheckBox()
|
||||
display_layout.addRow("Show Timestamps:", self.timestamps_check)
|
||||
|
||||
self.maxlines_input = QLineEdit()
|
||||
display_layout.addRow("Max Lines:", self.maxlines_input)
|
||||
|
||||
self.font_family_combo = QComboBox()
|
||||
self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"])
|
||||
display_layout.addRow("Font Family:", self.font_family_combo)
|
||||
|
||||
self.font_size_input = QLineEdit()
|
||||
display_layout.addRow("Font Size:", self.font_size_input)
|
||||
|
||||
self.fade_seconds_input = QLineEdit()
|
||||
display_layout.addRow("Fade After (seconds):", self.fade_seconds_input)
|
||||
|
||||
display_group.setLayout(display_layout)
|
||||
main_layout.addWidget(display_group)
|
||||
|
||||
# Buttons
|
||||
button_layout = QHBoxLayout()
|
||||
button_layout.addStretch()
|
||||
|
||||
self.cancel_button = QPushButton("Cancel")
|
||||
self.cancel_button.clicked.connect(self.reject)
|
||||
button_layout.addWidget(self.cancel_button)
|
||||
|
||||
self.save_button = QPushButton("Save")
|
||||
self.save_button.clicked.connect(self._save_settings)
|
||||
self.save_button.setDefault(True)
|
||||
button_layout.addWidget(self.save_button)
|
||||
|
||||
main_layout.addLayout(button_layout)
|
||||
|
||||
def _update_strength_label(self, value):
|
||||
"""Update the noise strength label."""
|
||||
self.noise_strength_label.setText(f"{value / 100:.1f}")
|
||||
|
||||
def _load_current_settings(self):
|
||||
"""Load current settings from config."""
|
||||
# User settings
|
||||
self.name_input.setText(self.config.get('user.name', 'User'))
|
||||
|
||||
# Audio settings
|
||||
current_device = self.config.get('audio.input_device', 'default')
|
||||
for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
|
||||
if str(dev_idx) == current_device or (current_device == 'default' and idx == 0):
|
||||
self.audio_device_combo.setCurrentIndex(idx)
|
||||
break
|
||||
|
||||
self.chunk_input.setText(str(self.config.get('audio.chunk_duration', 3.0)))
|
||||
|
||||
# Transcription settings
|
||||
model = self.config.get('transcription.model', 'base')
|
||||
self.model_combo.setCurrentText(model)
|
||||
|
||||
current_compute = self.config.get('transcription.device', 'auto')
|
||||
for idx, (dev_id, dev_desc) in enumerate(self.compute_devices):
|
||||
if dev_id == current_compute or (current_compute == 'auto' and idx == 0):
|
||||
self.compute_device_combo.setCurrentIndex(idx)
|
||||
break
|
||||
|
||||
lang = self.config.get('transcription.language', 'en')
|
||||
self.lang_combo.setCurrentText(lang)
|
||||
|
||||
# Noise suppression
|
||||
self.noise_enabled_check.setChecked(self.config.get('noise_suppression.enabled', True))
|
||||
strength = self.config.get('noise_suppression.strength', 0.7)
|
||||
self.noise_strength_slider.setValue(int(strength * 100))
|
||||
self._update_strength_label(int(strength * 100))
|
||||
self.vad_enabled_check.setChecked(self.config.get('processing.use_vad', True))
|
||||
|
||||
# Display settings
|
||||
self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True))
|
||||
self.maxlines_input.setText(str(self.config.get('display.max_lines', 100)))
|
||||
|
||||
font_family = self.config.get('display.font_family', 'Courier')
|
||||
self.font_family_combo.setCurrentText(font_family)
|
||||
|
||||
self.font_size_input.setText(str(self.config.get('display.font_size', 12)))
|
||||
self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10)))
|
||||
|
||||
def _save_settings(self):
|
||||
"""Save settings to config."""
|
||||
try:
|
||||
# User settings
|
||||
self.config.set('user.name', self.name_input.text())
|
||||
|
||||
# Audio settings
|
||||
selected_audio_idx = self.audio_device_combo.currentIndex()
|
||||
dev_idx, _ = self.audio_devices[selected_audio_idx]
|
||||
self.config.set('audio.input_device', str(dev_idx))
|
||||
|
||||
chunk_duration = float(self.chunk_input.text())
|
||||
self.config.set('audio.chunk_duration', chunk_duration)
|
||||
|
||||
# Transcription settings
|
||||
self.config.set('transcription.model', self.model_combo.currentText())
|
||||
|
||||
selected_compute_idx = self.compute_device_combo.currentIndex()
|
||||
dev_id, _ = self.compute_devices[selected_compute_idx]
|
||||
self.config.set('transcription.device', dev_id)
|
||||
|
||||
self.config.set('transcription.language', self.lang_combo.currentText())
|
||||
|
||||
# Noise suppression
|
||||
self.config.set('noise_suppression.enabled', self.noise_enabled_check.isChecked())
|
||||
self.config.set('noise_suppression.strength', self.noise_strength_slider.value() / 100.0)
|
||||
self.config.set('processing.use_vad', self.vad_enabled_check.isChecked())
|
||||
|
||||
# Display settings
|
||||
self.config.set('display.show_timestamps', self.timestamps_check.isChecked())
|
||||
max_lines = int(self.maxlines_input.text())
|
||||
self.config.set('display.max_lines', max_lines)
|
||||
self.config.set('display.font_family', self.font_family_combo.currentText())
|
||||
font_size = int(self.font_size_input.text())
|
||||
self.config.set('display.font_size', font_size)
|
||||
fade_seconds = int(self.fade_seconds_input.text())
|
||||
self.config.set('display.fade_after_seconds', fade_seconds)
|
||||
|
||||
# Call save callback
|
||||
if self.on_save:
|
||||
self.on_save()
|
||||
|
||||
QMessageBox.information(self, "Settings Saved", "Settings have been saved successfully!")
|
||||
self.accept()
|
||||
|
||||
except ValueError as e:
|
||||
QMessageBox.critical(self, "Invalid Input", f"Please check your input values:\n{e}")
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")
|
||||
Reference in New Issue
Block a user