Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
311 lines
12 KiB
Python
311 lines
12 KiB
Python
"""Settings dialog for configuring the application."""
|
|
|
|
import customtkinter as ctk
|
|
from tkinter import messagebox
|
|
from typing import Callable, List, Tuple
|
|
|
|
|
|
class SettingsDialog(ctk.CTkToplevel):
|
|
"""Dialog window for application settings."""
|
|
|
|
def __init__(
|
|
self,
|
|
parent,
|
|
config,
|
|
audio_devices: List[Tuple[int, str]],
|
|
compute_devices: List[Tuple[str, str]],
|
|
on_save: Callable = None
|
|
):
|
|
"""
|
|
Initialize settings dialog.
|
|
|
|
Args:
|
|
parent: Parent window
|
|
config: Configuration object
|
|
audio_devices: List of (device_index, device_name) tuples
|
|
compute_devices: List of (device_id, device_description) tuples
|
|
on_save: Callback function when settings are saved
|
|
"""
|
|
super().__init__(parent)
|
|
|
|
self.config = config
|
|
self.audio_devices = audio_devices
|
|
self.compute_devices = compute_devices
|
|
self.on_save = on_save
|
|
|
|
# Window configuration
|
|
self.title("Settings")
|
|
self.geometry("600x700")
|
|
self.resizable(False, False)
|
|
|
|
# Make dialog modal
|
|
self.transient(parent)
|
|
self.grab_set()
|
|
|
|
self._create_widgets()
|
|
self._load_current_settings()
|
|
|
|
def _create_widgets(self):
|
|
"""Create all settings widgets."""
|
|
# Main container with padding
|
|
main_frame = ctk.CTkFrame(self)
|
|
main_frame.pack(fill="both", expand=True, padx=20, pady=20)
|
|
|
|
# User Settings Section
|
|
user_frame = ctk.CTkFrame(main_frame)
|
|
user_frame.pack(fill="x", pady=(0, 15))
|
|
|
|
ctk.CTkLabel(user_frame, text="User Settings", font=("", 16, "bold")).pack(
|
|
anchor="w", padx=10, pady=(10, 5)
|
|
)
|
|
|
|
# User name
|
|
name_frame = ctk.CTkFrame(user_frame)
|
|
name_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(name_frame, text="Display Name:", width=150).pack(side="left", padx=5)
|
|
self.name_entry = ctk.CTkEntry(name_frame, width=300)
|
|
self.name_entry.pack(side="left", padx=5)
|
|
|
|
# Audio Settings Section
|
|
audio_frame = ctk.CTkFrame(main_frame)
|
|
audio_frame.pack(fill="x", pady=(0, 15))
|
|
|
|
ctk.CTkLabel(audio_frame, text="Audio Settings", font=("", 16, "bold")).pack(
|
|
anchor="w", padx=10, pady=(10, 5)
|
|
)
|
|
|
|
# Audio device
|
|
device_frame = ctk.CTkFrame(audio_frame)
|
|
device_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(device_frame, text="Input Device:", width=150).pack(side="left", padx=5)
|
|
device_names = [name for _, name in self.audio_devices]
|
|
self.audio_device_menu = ctk.CTkOptionMenu(device_frame, values=device_names, width=300)
|
|
self.audio_device_menu.pack(side="left", padx=5)
|
|
|
|
# Chunk duration
|
|
chunk_frame = ctk.CTkFrame(audio_frame)
|
|
chunk_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(chunk_frame, text="Chunk Duration (s):", width=150).pack(side="left", padx=5)
|
|
self.chunk_entry = ctk.CTkEntry(chunk_frame, width=100)
|
|
self.chunk_entry.pack(side="left", padx=5)
|
|
|
|
# Transcription Settings Section
|
|
transcription_frame = ctk.CTkFrame(main_frame)
|
|
transcription_frame.pack(fill="x", pady=(0, 15))
|
|
|
|
ctk.CTkLabel(transcription_frame, text="Transcription Settings", font=("", 16, "bold")).pack(
|
|
anchor="w", padx=10, pady=(10, 5)
|
|
)
|
|
|
|
# Model size
|
|
model_frame = ctk.CTkFrame(transcription_frame)
|
|
model_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(model_frame, text="Model Size:", width=150).pack(side="left", padx=5)
|
|
self.model_menu = ctk.CTkOptionMenu(
|
|
model_frame,
|
|
values=["tiny", "base", "small", "medium", "large"],
|
|
width=200
|
|
)
|
|
self.model_menu.pack(side="left", padx=5)
|
|
|
|
# Compute device
|
|
compute_frame = ctk.CTkFrame(transcription_frame)
|
|
compute_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(compute_frame, text="Compute Device:", width=150).pack(side="left", padx=5)
|
|
device_descs = [desc for _, desc in self.compute_devices]
|
|
self.compute_device_menu = ctk.CTkOptionMenu(compute_frame, values=device_descs, width=300)
|
|
self.compute_device_menu.pack(side="left", padx=5)
|
|
|
|
# Language
|
|
lang_frame = ctk.CTkFrame(transcription_frame)
|
|
lang_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(lang_frame, text="Language:", width=150).pack(side="left", padx=5)
|
|
self.lang_menu = ctk.CTkOptionMenu(
|
|
lang_frame,
|
|
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"],
|
|
width=200
|
|
)
|
|
self.lang_menu.pack(side="left", padx=5)
|
|
|
|
# Noise Suppression Section
|
|
noise_frame = ctk.CTkFrame(main_frame)
|
|
noise_frame.pack(fill="x", pady=(0, 15))
|
|
|
|
ctk.CTkLabel(noise_frame, text="Noise Suppression", font=("", 16, "bold")).pack(
|
|
anchor="w", padx=10, pady=(10, 5)
|
|
)
|
|
|
|
# Enable noise suppression
|
|
ns_enable_frame = ctk.CTkFrame(noise_frame)
|
|
ns_enable_frame.pack(fill="x", padx=10, pady=5)
|
|
self.noise_enabled_var = ctk.BooleanVar()
|
|
self.noise_enabled_check = ctk.CTkCheckBox(
|
|
ns_enable_frame,
|
|
text="Enable Noise Suppression",
|
|
variable=self.noise_enabled_var
|
|
)
|
|
self.noise_enabled_check.pack(side="left", padx=5)
|
|
|
|
# Noise suppression strength
|
|
strength_frame = ctk.CTkFrame(noise_frame)
|
|
strength_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(strength_frame, text="Strength:", width=150).pack(side="left", padx=5)
|
|
self.noise_strength_slider = ctk.CTkSlider(
|
|
strength_frame,
|
|
from_=0.0,
|
|
to=1.0,
|
|
number_of_steps=20,
|
|
width=300
|
|
)
|
|
self.noise_strength_slider.pack(side="left", padx=5)
|
|
self.noise_strength_label = ctk.CTkLabel(strength_frame, text="0.7", width=40)
|
|
self.noise_strength_label.pack(side="left", padx=5)
|
|
self.noise_strength_slider.configure(command=self._update_strength_label)
|
|
|
|
# VAD
|
|
vad_frame = ctk.CTkFrame(noise_frame)
|
|
vad_frame.pack(fill="x", padx=10, pady=5)
|
|
self.vad_enabled_var = ctk.BooleanVar()
|
|
self.vad_enabled_check = ctk.CTkCheckBox(
|
|
vad_frame,
|
|
text="Enable Voice Activity Detection",
|
|
variable=self.vad_enabled_var
|
|
)
|
|
self.vad_enabled_check.pack(side="left", padx=5)
|
|
|
|
# Display Settings Section
|
|
display_frame = ctk.CTkFrame(main_frame)
|
|
display_frame.pack(fill="x", pady=(0, 15))
|
|
|
|
ctk.CTkLabel(display_frame, text="Display Settings", font=("", 16, "bold")).pack(
|
|
anchor="w", padx=10, pady=(10, 5)
|
|
)
|
|
|
|
# Show timestamps
|
|
ts_frame = ctk.CTkFrame(display_frame)
|
|
ts_frame.pack(fill="x", padx=10, pady=5)
|
|
self.timestamps_var = ctk.BooleanVar()
|
|
self.timestamps_check = ctk.CTkCheckBox(
|
|
ts_frame,
|
|
text="Show Timestamps",
|
|
variable=self.timestamps_var
|
|
)
|
|
self.timestamps_check.pack(side="left", padx=5)
|
|
|
|
# Max lines
|
|
maxlines_frame = ctk.CTkFrame(display_frame)
|
|
maxlines_frame.pack(fill="x", padx=10, pady=5)
|
|
ctk.CTkLabel(maxlines_frame, text="Max Lines:", width=150).pack(side="left", padx=5)
|
|
self.maxlines_entry = ctk.CTkEntry(maxlines_frame, width=100)
|
|
self.maxlines_entry.pack(side="left", padx=5)
|
|
|
|
# Buttons
|
|
button_frame = ctk.CTkFrame(main_frame)
|
|
button_frame.pack(fill="x", pady=(10, 0))
|
|
|
|
self.save_button = ctk.CTkButton(
|
|
button_frame,
|
|
text="Save",
|
|
command=self._save_settings,
|
|
width=120
|
|
)
|
|
self.save_button.pack(side="right", padx=5)
|
|
|
|
self.cancel_button = ctk.CTkButton(
|
|
button_frame,
|
|
text="Cancel",
|
|
command=self.destroy,
|
|
width=120,
|
|
fg_color="gray"
|
|
)
|
|
self.cancel_button.pack(side="right", padx=5)
|
|
|
|
def _update_strength_label(self, value):
|
|
"""Update the noise strength label."""
|
|
self.noise_strength_label.configure(text=f"{value:.1f}")
|
|
|
|
def _load_current_settings(self):
|
|
"""Load current settings from config."""
|
|
# User settings
|
|
self.name_entry.insert(0, self.config.get('user.name', 'User'))
|
|
|
|
# Audio settings
|
|
current_device = self.config.get('audio.input_device', 'default')
|
|
for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
|
|
if str(dev_idx) == current_device or current_device == 'default' and idx == 0:
|
|
self.audio_device_menu.set(dev_name)
|
|
break
|
|
|
|
self.chunk_entry.insert(0, str(self.config.get('audio.chunk_duration', 3.0)))
|
|
|
|
# Transcription settings
|
|
self.model_menu.set(self.config.get('transcription.model', 'base'))
|
|
|
|
current_compute = self.config.get('transcription.device', 'auto')
|
|
for dev_id, dev_desc in self.compute_devices:
|
|
if dev_id == current_compute or (current_compute == 'auto' and dev_id == self.compute_devices[0][0]):
|
|
self.compute_device_menu.set(dev_desc)
|
|
break
|
|
|
|
self.lang_menu.set(self.config.get('transcription.language', 'en'))
|
|
|
|
# Noise suppression
|
|
self.noise_enabled_var.set(self.config.get('noise_suppression.enabled', True))
|
|
strength = self.config.get('noise_suppression.strength', 0.7)
|
|
self.noise_strength_slider.set(strength)
|
|
self._update_strength_label(strength)
|
|
self.vad_enabled_var.set(self.config.get('processing.use_vad', True))
|
|
|
|
# Display settings
|
|
self.timestamps_var.set(self.config.get('display.show_timestamps', True))
|
|
self.maxlines_entry.insert(0, str(self.config.get('display.max_lines', 100)))
|
|
|
|
def _save_settings(self):
|
|
"""Save settings to config."""
|
|
try:
|
|
# User settings
|
|
self.config.set('user.name', self.name_entry.get())
|
|
|
|
# Audio settings
|
|
selected_audio = self.audio_device_menu.get()
|
|
for dev_idx, dev_name in self.audio_devices:
|
|
if dev_name == selected_audio:
|
|
self.config.set('audio.input_device', str(dev_idx))
|
|
break
|
|
|
|
chunk_duration = float(self.chunk_entry.get())
|
|
self.config.set('audio.chunk_duration', chunk_duration)
|
|
|
|
# Transcription settings
|
|
self.config.set('transcription.model', self.model_menu.get())
|
|
|
|
selected_compute = self.compute_device_menu.get()
|
|
for dev_id, dev_desc in self.compute_devices:
|
|
if dev_desc == selected_compute:
|
|
self.config.set('transcription.device', dev_id)
|
|
break
|
|
|
|
self.config.set('transcription.language', self.lang_menu.get())
|
|
|
|
# Noise suppression
|
|
self.config.set('noise_suppression.enabled', self.noise_enabled_var.get())
|
|
self.config.set('noise_suppression.strength', self.noise_strength_slider.get())
|
|
self.config.set('processing.use_vad', self.vad_enabled_var.get())
|
|
|
|
# Display settings
|
|
self.config.set('display.show_timestamps', self.timestamps_var.get())
|
|
max_lines = int(self.maxlines_entry.get())
|
|
self.config.set('display.max_lines', max_lines)
|
|
|
|
# Call save callback
|
|
if self.on_save:
|
|
self.on_save()
|
|
|
|
messagebox.showinfo("Settings Saved", "Settings have been saved successfully!")
|
|
self.destroy()
|
|
|
|
except ValueError as e:
|
|
messagebox.showerror("Invalid Input", f"Please check your input values:\n{e}")
|
|
except Exception as e:
|
|
messagebox.showerror("Error", f"Failed to save settings:\n{e}")
|