Initial commit: Local Transcription App v1.0
Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
310
gui/settings_dialog.py
Normal file
310
gui/settings_dialog.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""Settings dialog for configuring the application."""
|
||||
|
||||
import customtkinter as ctk
|
||||
from tkinter import messagebox
|
||||
from typing import Callable, List, Tuple
|
||||
|
||||
|
||||
class SettingsDialog(ctk.CTkToplevel):
|
||||
"""Dialog window for application settings."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
parent,
|
||||
config,
|
||||
audio_devices: List[Tuple[int, str]],
|
||||
compute_devices: List[Tuple[str, str]],
|
||||
on_save: Callable = None
|
||||
):
|
||||
"""
|
||||
Initialize settings dialog.
|
||||
|
||||
Args:
|
||||
parent: Parent window
|
||||
config: Configuration object
|
||||
audio_devices: List of (device_index, device_name) tuples
|
||||
compute_devices: List of (device_id, device_description) tuples
|
||||
on_save: Callback function when settings are saved
|
||||
"""
|
||||
super().__init__(parent)
|
||||
|
||||
self.config = config
|
||||
self.audio_devices = audio_devices
|
||||
self.compute_devices = compute_devices
|
||||
self.on_save = on_save
|
||||
|
||||
# Window configuration
|
||||
self.title("Settings")
|
||||
self.geometry("600x700")
|
||||
self.resizable(False, False)
|
||||
|
||||
# Make dialog modal
|
||||
self.transient(parent)
|
||||
self.grab_set()
|
||||
|
||||
self._create_widgets()
|
||||
self._load_current_settings()
|
||||
|
||||
def _create_widgets(self):
|
||||
"""Create all settings widgets."""
|
||||
# Main container with padding
|
||||
main_frame = ctk.CTkFrame(self)
|
||||
main_frame.pack(fill="both", expand=True, padx=20, pady=20)
|
||||
|
||||
# User Settings Section
|
||||
user_frame = ctk.CTkFrame(main_frame)
|
||||
user_frame.pack(fill="x", pady=(0, 15))
|
||||
|
||||
ctk.CTkLabel(user_frame, text="User Settings", font=("", 16, "bold")).pack(
|
||||
anchor="w", padx=10, pady=(10, 5)
|
||||
)
|
||||
|
||||
# User name
|
||||
name_frame = ctk.CTkFrame(user_frame)
|
||||
name_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(name_frame, text="Display Name:", width=150).pack(side="left", padx=5)
|
||||
self.name_entry = ctk.CTkEntry(name_frame, width=300)
|
||||
self.name_entry.pack(side="left", padx=5)
|
||||
|
||||
# Audio Settings Section
|
||||
audio_frame = ctk.CTkFrame(main_frame)
|
||||
audio_frame.pack(fill="x", pady=(0, 15))
|
||||
|
||||
ctk.CTkLabel(audio_frame, text="Audio Settings", font=("", 16, "bold")).pack(
|
||||
anchor="w", padx=10, pady=(10, 5)
|
||||
)
|
||||
|
||||
# Audio device
|
||||
device_frame = ctk.CTkFrame(audio_frame)
|
||||
device_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(device_frame, text="Input Device:", width=150).pack(side="left", padx=5)
|
||||
device_names = [name for _, name in self.audio_devices]
|
||||
self.audio_device_menu = ctk.CTkOptionMenu(device_frame, values=device_names, width=300)
|
||||
self.audio_device_menu.pack(side="left", padx=5)
|
||||
|
||||
# Chunk duration
|
||||
chunk_frame = ctk.CTkFrame(audio_frame)
|
||||
chunk_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(chunk_frame, text="Chunk Duration (s):", width=150).pack(side="left", padx=5)
|
||||
self.chunk_entry = ctk.CTkEntry(chunk_frame, width=100)
|
||||
self.chunk_entry.pack(side="left", padx=5)
|
||||
|
||||
# Transcription Settings Section
|
||||
transcription_frame = ctk.CTkFrame(main_frame)
|
||||
transcription_frame.pack(fill="x", pady=(0, 15))
|
||||
|
||||
ctk.CTkLabel(transcription_frame, text="Transcription Settings", font=("", 16, "bold")).pack(
|
||||
anchor="w", padx=10, pady=(10, 5)
|
||||
)
|
||||
|
||||
# Model size
|
||||
model_frame = ctk.CTkFrame(transcription_frame)
|
||||
model_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(model_frame, text="Model Size:", width=150).pack(side="left", padx=5)
|
||||
self.model_menu = ctk.CTkOptionMenu(
|
||||
model_frame,
|
||||
values=["tiny", "base", "small", "medium", "large"],
|
||||
width=200
|
||||
)
|
||||
self.model_menu.pack(side="left", padx=5)
|
||||
|
||||
# Compute device
|
||||
compute_frame = ctk.CTkFrame(transcription_frame)
|
||||
compute_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(compute_frame, text="Compute Device:", width=150).pack(side="left", padx=5)
|
||||
device_descs = [desc for _, desc in self.compute_devices]
|
||||
self.compute_device_menu = ctk.CTkOptionMenu(compute_frame, values=device_descs, width=300)
|
||||
self.compute_device_menu.pack(side="left", padx=5)
|
||||
|
||||
# Language
|
||||
lang_frame = ctk.CTkFrame(transcription_frame)
|
||||
lang_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(lang_frame, text="Language:", width=150).pack(side="left", padx=5)
|
||||
self.lang_menu = ctk.CTkOptionMenu(
|
||||
lang_frame,
|
||||
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"],
|
||||
width=200
|
||||
)
|
||||
self.lang_menu.pack(side="left", padx=5)
|
||||
|
||||
# Noise Suppression Section
|
||||
noise_frame = ctk.CTkFrame(main_frame)
|
||||
noise_frame.pack(fill="x", pady=(0, 15))
|
||||
|
||||
ctk.CTkLabel(noise_frame, text="Noise Suppression", font=("", 16, "bold")).pack(
|
||||
anchor="w", padx=10, pady=(10, 5)
|
||||
)
|
||||
|
||||
# Enable noise suppression
|
||||
ns_enable_frame = ctk.CTkFrame(noise_frame)
|
||||
ns_enable_frame.pack(fill="x", padx=10, pady=5)
|
||||
self.noise_enabled_var = ctk.BooleanVar()
|
||||
self.noise_enabled_check = ctk.CTkCheckBox(
|
||||
ns_enable_frame,
|
||||
text="Enable Noise Suppression",
|
||||
variable=self.noise_enabled_var
|
||||
)
|
||||
self.noise_enabled_check.pack(side="left", padx=5)
|
||||
|
||||
# Noise suppression strength
|
||||
strength_frame = ctk.CTkFrame(noise_frame)
|
||||
strength_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(strength_frame, text="Strength:", width=150).pack(side="left", padx=5)
|
||||
self.noise_strength_slider = ctk.CTkSlider(
|
||||
strength_frame,
|
||||
from_=0.0,
|
||||
to=1.0,
|
||||
number_of_steps=20,
|
||||
width=300
|
||||
)
|
||||
self.noise_strength_slider.pack(side="left", padx=5)
|
||||
self.noise_strength_label = ctk.CTkLabel(strength_frame, text="0.7", width=40)
|
||||
self.noise_strength_label.pack(side="left", padx=5)
|
||||
self.noise_strength_slider.configure(command=self._update_strength_label)
|
||||
|
||||
# VAD
|
||||
vad_frame = ctk.CTkFrame(noise_frame)
|
||||
vad_frame.pack(fill="x", padx=10, pady=5)
|
||||
self.vad_enabled_var = ctk.BooleanVar()
|
||||
self.vad_enabled_check = ctk.CTkCheckBox(
|
||||
vad_frame,
|
||||
text="Enable Voice Activity Detection",
|
||||
variable=self.vad_enabled_var
|
||||
)
|
||||
self.vad_enabled_check.pack(side="left", padx=5)
|
||||
|
||||
# Display Settings Section
|
||||
display_frame = ctk.CTkFrame(main_frame)
|
||||
display_frame.pack(fill="x", pady=(0, 15))
|
||||
|
||||
ctk.CTkLabel(display_frame, text="Display Settings", font=("", 16, "bold")).pack(
|
||||
anchor="w", padx=10, pady=(10, 5)
|
||||
)
|
||||
|
||||
# Show timestamps
|
||||
ts_frame = ctk.CTkFrame(display_frame)
|
||||
ts_frame.pack(fill="x", padx=10, pady=5)
|
||||
self.timestamps_var = ctk.BooleanVar()
|
||||
self.timestamps_check = ctk.CTkCheckBox(
|
||||
ts_frame,
|
||||
text="Show Timestamps",
|
||||
variable=self.timestamps_var
|
||||
)
|
||||
self.timestamps_check.pack(side="left", padx=5)
|
||||
|
||||
# Max lines
|
||||
maxlines_frame = ctk.CTkFrame(display_frame)
|
||||
maxlines_frame.pack(fill="x", padx=10, pady=5)
|
||||
ctk.CTkLabel(maxlines_frame, text="Max Lines:", width=150).pack(side="left", padx=5)
|
||||
self.maxlines_entry = ctk.CTkEntry(maxlines_frame, width=100)
|
||||
self.maxlines_entry.pack(side="left", padx=5)
|
||||
|
||||
# Buttons
|
||||
button_frame = ctk.CTkFrame(main_frame)
|
||||
button_frame.pack(fill="x", pady=(10, 0))
|
||||
|
||||
self.save_button = ctk.CTkButton(
|
||||
button_frame,
|
||||
text="Save",
|
||||
command=self._save_settings,
|
||||
width=120
|
||||
)
|
||||
self.save_button.pack(side="right", padx=5)
|
||||
|
||||
self.cancel_button = ctk.CTkButton(
|
||||
button_frame,
|
||||
text="Cancel",
|
||||
command=self.destroy,
|
||||
width=120,
|
||||
fg_color="gray"
|
||||
)
|
||||
self.cancel_button.pack(side="right", padx=5)
|
||||
|
||||
def _update_strength_label(self, value):
|
||||
"""Update the noise strength label."""
|
||||
self.noise_strength_label.configure(text=f"{value:.1f}")
|
||||
|
||||
def _load_current_settings(self):
|
||||
"""Load current settings from config."""
|
||||
# User settings
|
||||
self.name_entry.insert(0, self.config.get('user.name', 'User'))
|
||||
|
||||
# Audio settings
|
||||
current_device = self.config.get('audio.input_device', 'default')
|
||||
for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
|
||||
if str(dev_idx) == current_device or current_device == 'default' and idx == 0:
|
||||
self.audio_device_menu.set(dev_name)
|
||||
break
|
||||
|
||||
self.chunk_entry.insert(0, str(self.config.get('audio.chunk_duration', 3.0)))
|
||||
|
||||
# Transcription settings
|
||||
self.model_menu.set(self.config.get('transcription.model', 'base'))
|
||||
|
||||
current_compute = self.config.get('transcription.device', 'auto')
|
||||
for dev_id, dev_desc in self.compute_devices:
|
||||
if dev_id == current_compute or (current_compute == 'auto' and dev_id == self.compute_devices[0][0]):
|
||||
self.compute_device_menu.set(dev_desc)
|
||||
break
|
||||
|
||||
self.lang_menu.set(self.config.get('transcription.language', 'en'))
|
||||
|
||||
# Noise suppression
|
||||
self.noise_enabled_var.set(self.config.get('noise_suppression.enabled', True))
|
||||
strength = self.config.get('noise_suppression.strength', 0.7)
|
||||
self.noise_strength_slider.set(strength)
|
||||
self._update_strength_label(strength)
|
||||
self.vad_enabled_var.set(self.config.get('processing.use_vad', True))
|
||||
|
||||
# Display settings
|
||||
self.timestamps_var.set(self.config.get('display.show_timestamps', True))
|
||||
self.maxlines_entry.insert(0, str(self.config.get('display.max_lines', 100)))
|
||||
|
||||
def _save_settings(self):
|
||||
"""Save settings to config."""
|
||||
try:
|
||||
# User settings
|
||||
self.config.set('user.name', self.name_entry.get())
|
||||
|
||||
# Audio settings
|
||||
selected_audio = self.audio_device_menu.get()
|
||||
for dev_idx, dev_name in self.audio_devices:
|
||||
if dev_name == selected_audio:
|
||||
self.config.set('audio.input_device', str(dev_idx))
|
||||
break
|
||||
|
||||
chunk_duration = float(self.chunk_entry.get())
|
||||
self.config.set('audio.chunk_duration', chunk_duration)
|
||||
|
||||
# Transcription settings
|
||||
self.config.set('transcription.model', self.model_menu.get())
|
||||
|
||||
selected_compute = self.compute_device_menu.get()
|
||||
for dev_id, dev_desc in self.compute_devices:
|
||||
if dev_desc == selected_compute:
|
||||
self.config.set('transcription.device', dev_id)
|
||||
break
|
||||
|
||||
self.config.set('transcription.language', self.lang_menu.get())
|
||||
|
||||
# Noise suppression
|
||||
self.config.set('noise_suppression.enabled', self.noise_enabled_var.get())
|
||||
self.config.set('noise_suppression.strength', self.noise_strength_slider.get())
|
||||
self.config.set('processing.use_vad', self.vad_enabled_var.get())
|
||||
|
||||
# Display settings
|
||||
self.config.set('display.show_timestamps', self.timestamps_var.get())
|
||||
max_lines = int(self.maxlines_entry.get())
|
||||
self.config.set('display.max_lines', max_lines)
|
||||
|
||||
# Call save callback
|
||||
if self.on_save:
|
||||
self.on_save()
|
||||
|
||||
messagebox.showinfo("Settings Saved", "Settings have been saved successfully!")
|
||||
self.destroy()
|
||||
|
||||
except ValueError as e:
|
||||
messagebox.showerror("Invalid Input", f"Please check your input values:\n{e}")
|
||||
except Exception as e:
|
||||
messagebox.showerror("Error", f"Failed to save settings:\n{e}")
|
||||
Reference in New Issue
Block a user