Files
local-transcription/gui/settings_dialog.py
Josh Knapp 472233aec4 Initial commit: Local Transcription App v1.0
Phase 1 Complete - Standalone Desktop Application

Features:
- Real-time speech-to-text with Whisper (faster-whisper)
- PySide6 desktop GUI with settings dialog
- Web server for OBS browser source integration
- Audio capture with automatic sample rate detection and resampling
- Noise suppression with Voice Activity Detection (VAD)
- Configurable display settings (font, timestamps, fade duration)
- Settings apply without restart (with automatic model reloading)
- Auto-fade for web display transcriptions
- CPU/GPU support with automatic device detection
- Standalone executable builds (PyInstaller)
- CUDA build support (works on systems without CUDA hardware)

Components:
- Audio capture with sounddevice
- Noise reduction with noisereduce + webrtcvad
- Transcription with faster-whisper
- GUI with PySide6
- Web server with FastAPI + WebSocket
- Configuration system with YAML

Build System:
- Standard builds (CPU-only): build.sh / build.bat
- CUDA builds (universal): build-cuda.sh / build-cuda.bat
- Comprehensive BUILD.md documentation
- Cross-platform support (Linux, Windows)

Documentation:
- README.md with project overview and quick start
- BUILD.md with detailed build instructions
- NEXT_STEPS.md with future enhancement roadmap
- INSTALL.md with setup instructions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-25 18:48:23 -08:00

311 lines
12 KiB
Python

"""Settings dialog for configuring the application."""
import customtkinter as ctk
from tkinter import messagebox
from typing import Callable, List, Tuple
class SettingsDialog(ctk.CTkToplevel):
"""Dialog window for application settings."""
def __init__(
self,
parent,
config,
audio_devices: List[Tuple[int, str]],
compute_devices: List[Tuple[str, str]],
on_save: Callable = None
):
"""
Initialize settings dialog.
Args:
parent: Parent window
config: Configuration object
audio_devices: List of (device_index, device_name) tuples
compute_devices: List of (device_id, device_description) tuples
on_save: Callback function when settings are saved
"""
super().__init__(parent)
self.config = config
self.audio_devices = audio_devices
self.compute_devices = compute_devices
self.on_save = on_save
# Window configuration
self.title("Settings")
self.geometry("600x700")
self.resizable(False, False)
# Make dialog modal
self.transient(parent)
self.grab_set()
self._create_widgets()
self._load_current_settings()
def _create_widgets(self):
"""Create all settings widgets."""
# Main container with padding
main_frame = ctk.CTkFrame(self)
main_frame.pack(fill="both", expand=True, padx=20, pady=20)
# User Settings Section
user_frame = ctk.CTkFrame(main_frame)
user_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(user_frame, text="User Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# User name
name_frame = ctk.CTkFrame(user_frame)
name_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(name_frame, text="Display Name:", width=150).pack(side="left", padx=5)
self.name_entry = ctk.CTkEntry(name_frame, width=300)
self.name_entry.pack(side="left", padx=5)
# Audio Settings Section
audio_frame = ctk.CTkFrame(main_frame)
audio_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(audio_frame, text="Audio Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Audio device
device_frame = ctk.CTkFrame(audio_frame)
device_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(device_frame, text="Input Device:", width=150).pack(side="left", padx=5)
device_names = [name for _, name in self.audio_devices]
self.audio_device_menu = ctk.CTkOptionMenu(device_frame, values=device_names, width=300)
self.audio_device_menu.pack(side="left", padx=5)
# Chunk duration
chunk_frame = ctk.CTkFrame(audio_frame)
chunk_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(chunk_frame, text="Chunk Duration (s):", width=150).pack(side="left", padx=5)
self.chunk_entry = ctk.CTkEntry(chunk_frame, width=100)
self.chunk_entry.pack(side="left", padx=5)
# Transcription Settings Section
transcription_frame = ctk.CTkFrame(main_frame)
transcription_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(transcription_frame, text="Transcription Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Model size
model_frame = ctk.CTkFrame(transcription_frame)
model_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(model_frame, text="Model Size:", width=150).pack(side="left", padx=5)
self.model_menu = ctk.CTkOptionMenu(
model_frame,
values=["tiny", "base", "small", "medium", "large"],
width=200
)
self.model_menu.pack(side="left", padx=5)
# Compute device
compute_frame = ctk.CTkFrame(transcription_frame)
compute_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(compute_frame, text="Compute Device:", width=150).pack(side="left", padx=5)
device_descs = [desc for _, desc in self.compute_devices]
self.compute_device_menu = ctk.CTkOptionMenu(compute_frame, values=device_descs, width=300)
self.compute_device_menu.pack(side="left", padx=5)
# Language
lang_frame = ctk.CTkFrame(transcription_frame)
lang_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(lang_frame, text="Language:", width=150).pack(side="left", padx=5)
self.lang_menu = ctk.CTkOptionMenu(
lang_frame,
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"],
width=200
)
self.lang_menu.pack(side="left", padx=5)
# Noise Suppression Section
noise_frame = ctk.CTkFrame(main_frame)
noise_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(noise_frame, text="Noise Suppression", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Enable noise suppression
ns_enable_frame = ctk.CTkFrame(noise_frame)
ns_enable_frame.pack(fill="x", padx=10, pady=5)
self.noise_enabled_var = ctk.BooleanVar()
self.noise_enabled_check = ctk.CTkCheckBox(
ns_enable_frame,
text="Enable Noise Suppression",
variable=self.noise_enabled_var
)
self.noise_enabled_check.pack(side="left", padx=5)
# Noise suppression strength
strength_frame = ctk.CTkFrame(noise_frame)
strength_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(strength_frame, text="Strength:", width=150).pack(side="left", padx=5)
self.noise_strength_slider = ctk.CTkSlider(
strength_frame,
from_=0.0,
to=1.0,
number_of_steps=20,
width=300
)
self.noise_strength_slider.pack(side="left", padx=5)
self.noise_strength_label = ctk.CTkLabel(strength_frame, text="0.7", width=40)
self.noise_strength_label.pack(side="left", padx=5)
self.noise_strength_slider.configure(command=self._update_strength_label)
# VAD
vad_frame = ctk.CTkFrame(noise_frame)
vad_frame.pack(fill="x", padx=10, pady=5)
self.vad_enabled_var = ctk.BooleanVar()
self.vad_enabled_check = ctk.CTkCheckBox(
vad_frame,
text="Enable Voice Activity Detection",
variable=self.vad_enabled_var
)
self.vad_enabled_check.pack(side="left", padx=5)
# Display Settings Section
display_frame = ctk.CTkFrame(main_frame)
display_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(display_frame, text="Display Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Show timestamps
ts_frame = ctk.CTkFrame(display_frame)
ts_frame.pack(fill="x", padx=10, pady=5)
self.timestamps_var = ctk.BooleanVar()
self.timestamps_check = ctk.CTkCheckBox(
ts_frame,
text="Show Timestamps",
variable=self.timestamps_var
)
self.timestamps_check.pack(side="left", padx=5)
# Max lines
maxlines_frame = ctk.CTkFrame(display_frame)
maxlines_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(maxlines_frame, text="Max Lines:", width=150).pack(side="left", padx=5)
self.maxlines_entry = ctk.CTkEntry(maxlines_frame, width=100)
self.maxlines_entry.pack(side="left", padx=5)
# Buttons
button_frame = ctk.CTkFrame(main_frame)
button_frame.pack(fill="x", pady=(10, 0))
self.save_button = ctk.CTkButton(
button_frame,
text="Save",
command=self._save_settings,
width=120
)
self.save_button.pack(side="right", padx=5)
self.cancel_button = ctk.CTkButton(
button_frame,
text="Cancel",
command=self.destroy,
width=120,
fg_color="gray"
)
self.cancel_button.pack(side="right", padx=5)
def _update_strength_label(self, value):
"""Update the noise strength label."""
self.noise_strength_label.configure(text=f"{value:.1f}")
def _load_current_settings(self):
"""Load current settings from config."""
# User settings
self.name_entry.insert(0, self.config.get('user.name', 'User'))
# Audio settings
current_device = self.config.get('audio.input_device', 'default')
for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
if str(dev_idx) == current_device or current_device == 'default' and idx == 0:
self.audio_device_menu.set(dev_name)
break
self.chunk_entry.insert(0, str(self.config.get('audio.chunk_duration', 3.0)))
# Transcription settings
self.model_menu.set(self.config.get('transcription.model', 'base'))
current_compute = self.config.get('transcription.device', 'auto')
for dev_id, dev_desc in self.compute_devices:
if dev_id == current_compute or (current_compute == 'auto' and dev_id == self.compute_devices[0][0]):
self.compute_device_menu.set(dev_desc)
break
self.lang_menu.set(self.config.get('transcription.language', 'en'))
# Noise suppression
self.noise_enabled_var.set(self.config.get('noise_suppression.enabled', True))
strength = self.config.get('noise_suppression.strength', 0.7)
self.noise_strength_slider.set(strength)
self._update_strength_label(strength)
self.vad_enabled_var.set(self.config.get('processing.use_vad', True))
# Display settings
self.timestamps_var.set(self.config.get('display.show_timestamps', True))
self.maxlines_entry.insert(0, str(self.config.get('display.max_lines', 100)))
def _save_settings(self):
"""Save settings to config."""
try:
# User settings
self.config.set('user.name', self.name_entry.get())
# Audio settings
selected_audio = self.audio_device_menu.get()
for dev_idx, dev_name in self.audio_devices:
if dev_name == selected_audio:
self.config.set('audio.input_device', str(dev_idx))
break
chunk_duration = float(self.chunk_entry.get())
self.config.set('audio.chunk_duration', chunk_duration)
# Transcription settings
self.config.set('transcription.model', self.model_menu.get())
selected_compute = self.compute_device_menu.get()
for dev_id, dev_desc in self.compute_devices:
if dev_desc == selected_compute:
self.config.set('transcription.device', dev_id)
break
self.config.set('transcription.language', self.lang_menu.get())
# Noise suppression
self.config.set('noise_suppression.enabled', self.noise_enabled_var.get())
self.config.set('noise_suppression.strength', self.noise_strength_slider.get())
self.config.set('processing.use_vad', self.vad_enabled_var.get())
# Display settings
self.config.set('display.show_timestamps', self.timestamps_var.get())
max_lines = int(self.maxlines_entry.get())
self.config.set('display.max_lines', max_lines)
# Call save callback
if self.on_save:
self.on_save()
messagebox.showinfo("Settings Saved", "Settings have been saved successfully!")
self.destroy()
except ValueError as e:
messagebox.showerror("Invalid Input", f"Please check your input values:\n{e}")
except Exception as e:
messagebox.showerror("Error", f"Failed to save settings:\n{e}")